polars-df 0.20.0-x86_64-darwin → 0.21.1-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +192 -186
- data/LICENSE-THIRD-PARTY.txt +1431 -1810
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/3.4/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +130 -32
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +12 -2
- data/lib/polars/data_frame.rb +834 -48
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +61 -5
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +1247 -211
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +127 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +19 -1
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +70 -66
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +1099 -95
- data/lib/polars/list_expr.rb +400 -11
- data/lib/polars/list_name_space.rb +321 -5
- data/lib/polars/meta_expr.rb +71 -22
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +84 -3
- data/lib/polars/selector.rb +210 -0
- data/lib/polars/selectors.rb +932 -203
- data/lib/polars/series.rb +1083 -63
- data/lib/polars/string_expr.rb +435 -9
- data/lib/polars/string_name_space.rb +729 -45
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +9 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +10 -0
- metadata +12 -2
data/lib/polars/series.rb
CHANGED
@@ -287,7 +287,7 @@ module Polars
|
|
287
287
|
self != other
|
288
288
|
end
|
289
289
|
|
290
|
-
# Method equivalent of equality operator `series != other` where `
|
290
|
+
# Method equivalent of equality operator `series != other` where `nil == nil`.
|
291
291
|
#
|
292
292
|
# This differs from the standard `ne` where null values are propagated.
|
293
293
|
#
|
@@ -407,7 +407,7 @@ module Polars
|
|
407
407
|
# @return [Series]
|
408
408
|
def !
|
409
409
|
if dtype == Boolean
|
410
|
-
return Utils.wrap_s(_s.
|
410
|
+
return Utils.wrap_s(_s.not_)
|
411
411
|
end
|
412
412
|
raise NotImplementedError
|
413
413
|
end
|
@@ -447,7 +447,7 @@ module Polars
|
|
447
447
|
item = len + item
|
448
448
|
end
|
449
449
|
|
450
|
-
return _s.
|
450
|
+
return _s.get_index(item)
|
451
451
|
end
|
452
452
|
|
453
453
|
if item.is_a?(Range)
|
@@ -496,6 +496,37 @@ module Polars
|
|
496
496
|
end
|
497
497
|
end
|
498
498
|
|
499
|
+
# Return the Series as a scalar, or return the element at the given index.
|
500
|
+
#
|
501
|
+
# If no index is provided, this is equivalent to `s[0]`, with a check
|
502
|
+
# that the shape is (1,). With an index, this is equivalent to `s[index]`.
|
503
|
+
#
|
504
|
+
# @return [Object]
|
505
|
+
#
|
506
|
+
# @example
|
507
|
+
# s1 = Polars::Series.new("a", [1])
|
508
|
+
# s1.item
|
509
|
+
# # => 1
|
510
|
+
#
|
511
|
+
# @example
|
512
|
+
# s2 = Polars::Series.new("a", [9, 8, 7])
|
513
|
+
# s2.cum_sum.item(-1)
|
514
|
+
# # => 24
|
515
|
+
def item(index = nil)
|
516
|
+
if index.nil?
|
517
|
+
if len != 1
|
518
|
+
msg = (
|
519
|
+
"can only call '.item' if the Series is of length 1," +
|
520
|
+
" or an explicit index is provided (Series is of length #{len})"
|
521
|
+
)
|
522
|
+
raise ArgumentError, msg
|
523
|
+
end
|
524
|
+
return _s.get_index(0)
|
525
|
+
end
|
526
|
+
|
527
|
+
_s.get_index_signed(index)
|
528
|
+
end
|
529
|
+
|
499
530
|
# Return an estimation of the total (heap) allocated size of the Series.
|
500
531
|
#
|
501
532
|
# Estimated size is given in the specified unit (bytes by default).
|
@@ -543,7 +574,26 @@ module Polars
|
|
543
574
|
# # 1.732051
|
544
575
|
# # ]
|
545
576
|
def sqrt
|
546
|
-
|
577
|
+
super
|
578
|
+
end
|
579
|
+
|
580
|
+
# Compute the cube root of the elements.
|
581
|
+
#
|
582
|
+
# @return [Series]
|
583
|
+
#
|
584
|
+
# @example
|
585
|
+
# s = Polars::Series.new([1, 2, 3])
|
586
|
+
# s.cbrt
|
587
|
+
# # =>
|
588
|
+
# # shape: (3,)
|
589
|
+
# # Series: '' [f64]
|
590
|
+
# # [
|
591
|
+
# # 1.0
|
592
|
+
# # 1.259921
|
593
|
+
# # 1.44225
|
594
|
+
# # ]
|
595
|
+
def cbrt
|
596
|
+
super
|
547
597
|
end
|
548
598
|
|
549
599
|
# Check if any boolean value in the column is `true`.
|
@@ -563,7 +613,7 @@ module Polars
|
|
563
613
|
# # => false
|
564
614
|
def any?(ignore_nulls: true, &block)
|
565
615
|
if block_given?
|
566
|
-
apply(skip_nulls: ignore_nulls, &block).any?
|
616
|
+
apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).any?
|
567
617
|
else
|
568
618
|
_s.any(ignore_nulls)
|
569
619
|
end
|
@@ -587,7 +637,7 @@ module Polars
|
|
587
637
|
# # => true
|
588
638
|
def all?(ignore_nulls: true, &block)
|
589
639
|
if block_given?
|
590
|
-
apply(skip_nulls: ignore_nulls, &block).all?
|
640
|
+
apply(return_dtype: Boolean, skip_nulls: ignore_nulls, &block).all?
|
591
641
|
else
|
592
642
|
_s.all(ignore_nulls)
|
593
643
|
end
|
@@ -611,7 +661,7 @@ module Polars
|
|
611
661
|
# # => true
|
612
662
|
def none?(&block)
|
613
663
|
if block_given?
|
614
|
-
apply(&block).none?
|
664
|
+
apply(return_dtype: Boolean, &block).none?
|
615
665
|
else
|
616
666
|
to_frame.select(Polars.col(name).is_not.all).to_series[0]
|
617
667
|
end
|
@@ -640,6 +690,25 @@ module Polars
|
|
640
690
|
super
|
641
691
|
end
|
642
692
|
|
693
|
+
# Compute the natural logarithm of the input array plus one, element-wise.
|
694
|
+
#
|
695
|
+
# @return [Series]
|
696
|
+
#
|
697
|
+
# @example
|
698
|
+
# s = Polars::Series.new([1, 2, 3])
|
699
|
+
# s.log1p
|
700
|
+
# # =>
|
701
|
+
# # shape: (3,)
|
702
|
+
# # Series: '' [f64]
|
703
|
+
# # [
|
704
|
+
# # 0.693147
|
705
|
+
# # 1.098612
|
706
|
+
# # 1.386294
|
707
|
+
# # ]
|
708
|
+
def log1p
|
709
|
+
super
|
710
|
+
end
|
711
|
+
|
643
712
|
# Compute the base 10 logarithm of the input array, element-wise.
|
644
713
|
#
|
645
714
|
# @return [Series]
|
@@ -875,6 +944,44 @@ module Polars
|
|
875
944
|
to_frame.select(Polars.col(name).product).to_series[0]
|
876
945
|
end
|
877
946
|
|
947
|
+
# Raise to the power of the given exponent.
|
948
|
+
#
|
949
|
+
# If the exponent is float, the result follows the dtype of exponent.
|
950
|
+
# Otherwise, it follows dtype of base.
|
951
|
+
#
|
952
|
+
# @param exponent [Numeric]
|
953
|
+
# The exponent. Accepts Series input.
|
954
|
+
#
|
955
|
+
# @return [Series]
|
956
|
+
#
|
957
|
+
# @example Raising integers to positive integers results in integers:
|
958
|
+
# s = Polars::Series.new("foo", [1, 2, 3, 4])
|
959
|
+
# s.pow(3)
|
960
|
+
# # =>
|
961
|
+
# # shape: (4,)
|
962
|
+
# # Series: 'foo' [i64]
|
963
|
+
# # [
|
964
|
+
# # 1
|
965
|
+
# # 8
|
966
|
+
# # 27
|
967
|
+
# # 64
|
968
|
+
# # ]
|
969
|
+
#
|
970
|
+
# @example In order to raise integers to negative integers, you can cast either the base or the exponent to float:
|
971
|
+
# s.pow(-3.0)
|
972
|
+
# # =>
|
973
|
+
# # shape: (4,)
|
974
|
+
# # Series: 'foo' [f64]
|
975
|
+
# # [
|
976
|
+
# # 1.0
|
977
|
+
# # 0.125
|
978
|
+
# # 0.037037
|
979
|
+
# # 0.015625
|
980
|
+
# # ]
|
981
|
+
def pow(exponent)
|
982
|
+
to_frame.select_seq(F.col(name).pow(exponent)).to_series
|
983
|
+
end
|
984
|
+
|
878
985
|
# Get the minimal value in this Series.
|
879
986
|
#
|
880
987
|
# @return [Object]
|
@@ -1004,6 +1111,13 @@ module Polars
|
|
1004
1111
|
|
1005
1112
|
# Get dummy variables.
|
1006
1113
|
#
|
1114
|
+
# @param separator [String]
|
1115
|
+
# Separator/delimiter used when generating column names.
|
1116
|
+
# @param drop_first [Boolean]
|
1117
|
+
# Remove the first category from the variable being encoded.
|
1118
|
+
# @param drop_nulls [Boolean]
|
1119
|
+
# If there are `nil` values in the series, a `null` column is not generated.
|
1120
|
+
#
|
1007
1121
|
# @return [DataFrame]
|
1008
1122
|
#
|
1009
1123
|
# @example
|
@@ -1020,8 +1134,8 @@ module Polars
|
|
1020
1134
|
# # │ 0 ┆ 1 ┆ 0 │
|
1021
1135
|
# # │ 0 ┆ 0 ┆ 1 │
|
1022
1136
|
# # └─────┴─────┴─────┘
|
1023
|
-
def to_dummies(separator: "_", drop_first: false)
|
1024
|
-
Utils.wrap_df(_s.to_dummies(separator, drop_first))
|
1137
|
+
def to_dummies(separator: "_", drop_first: false, drop_nulls: false)
|
1138
|
+
Utils.wrap_df(_s.to_dummies(separator, drop_first, drop_nulls))
|
1025
1139
|
end
|
1026
1140
|
|
1027
1141
|
# Bin continuous values into discrete categories.
|
@@ -1093,8 +1207,8 @@ module Polars
|
|
1093
1207
|
|
1094
1208
|
# Bin continuous values into discrete categories based on their quantiles.
|
1095
1209
|
#
|
1096
|
-
# @param quantiles [
|
1097
|
-
# Either
|
1210
|
+
# @param quantiles [Object]
|
1211
|
+
# Either an array of quantile probabilities between 0 and 1 or a positive
|
1098
1212
|
# integer determining the number of bins with uniform probability.
|
1099
1213
|
# @param labels [Array]
|
1100
1214
|
# Names of the categories. The number of labels must be equal to the number
|
@@ -1230,10 +1344,76 @@ module Polars
|
|
1230
1344
|
super
|
1231
1345
|
end
|
1232
1346
|
|
1347
|
+
# Bin values into buckets and count their occurrences.
|
1348
|
+
#
|
1349
|
+
# @note
|
1350
|
+
# This functionality is considered **unstable**. It may be changed
|
1351
|
+
# at any point without it being considered a breaking change.
|
1352
|
+
#
|
1353
|
+
# @param bins [Object]
|
1354
|
+
# Bin edges. If nil given, we determine the edges based on the data.
|
1355
|
+
# @param bin_count [Integer]
|
1356
|
+
# If `bins` is not provided, `bin_count` uniform bins are created that fully
|
1357
|
+
# encompass the data.
|
1358
|
+
# @param include_category [Boolean]
|
1359
|
+
# Include a column that shows the intervals as categories.
|
1360
|
+
# @param include_breakpoint [Boolean]
|
1361
|
+
# Include a column that indicates the upper breakpoint.
|
1362
|
+
#
|
1363
|
+
# @return [DataFrame]
|
1364
|
+
#
|
1365
|
+
# @example
|
1366
|
+
# a = Polars::Series.new("a", [1, 3, 8, 8, 2, 1, 3])
|
1367
|
+
# a.hist(bin_count: 4)
|
1368
|
+
# # =>
|
1369
|
+
# # shape: (4, 3)
|
1370
|
+
# # ┌────────────┬─────────────┬───────┐
|
1371
|
+
# # │ breakpoint ┆ category ┆ count │
|
1372
|
+
# # │ --- ┆ --- ┆ --- │
|
1373
|
+
# # │ f64 ┆ cat ┆ u32 │
|
1374
|
+
# # ╞════════════╪═════════════╪═══════╡
|
1375
|
+
# # │ 2.75 ┆ [1.0, 2.75] ┆ 3 │
|
1376
|
+
# # │ 4.5 ┆ (2.75, 4.5] ┆ 2 │
|
1377
|
+
# # │ 6.25 ┆ (4.5, 6.25] ┆ 0 │
|
1378
|
+
# # │ 8.0 ┆ (6.25, 8.0] ┆ 2 │
|
1379
|
+
# # └────────────┴─────────────┴───────┘
|
1380
|
+
def hist(
|
1381
|
+
bins: nil,
|
1382
|
+
bin_count: nil,
|
1383
|
+
include_category: true,
|
1384
|
+
include_breakpoint: true
|
1385
|
+
)
|
1386
|
+
out = (
|
1387
|
+
to_frame
|
1388
|
+
.select_seq(
|
1389
|
+
F.col(name).hist(
|
1390
|
+
bins: bins,
|
1391
|
+
bin_count: bin_count,
|
1392
|
+
include_category: include_category,
|
1393
|
+
include_breakpoint: include_breakpoint
|
1394
|
+
)
|
1395
|
+
)
|
1396
|
+
.to_series
|
1397
|
+
)
|
1398
|
+
if !include_breakpoint && !include_category
|
1399
|
+
out.to_frame
|
1400
|
+
else
|
1401
|
+
out.struct.unnest
|
1402
|
+
end
|
1403
|
+
end
|
1404
|
+
|
1233
1405
|
# Count the unique values in a Series.
|
1234
1406
|
#
|
1235
1407
|
# @param sort [Boolean]
|
1236
1408
|
# Ensure the output is sorted from most values to least.
|
1409
|
+
# @param parallel [Boolean]
|
1410
|
+
# Execute the computation in parallel.
|
1411
|
+
# @param name [String]
|
1412
|
+
# Give the resulting count column a specific name; if `normalize` is
|
1413
|
+
# true this defaults to "proportion", otherwise defaults to "count".
|
1414
|
+
# @param normalize [Boolean]
|
1415
|
+
# If true, the count is returned as the relative frequency of unique
|
1416
|
+
# values normalized to 1.0.
|
1237
1417
|
#
|
1238
1418
|
# @return [DataFrame]
|
1239
1419
|
#
|
@@ -1451,6 +1631,29 @@ module Polars
|
|
1451
1631
|
end
|
1452
1632
|
alias_method :cumsum, :cum_sum
|
1453
1633
|
|
1634
|
+
# Return the cumulative count of the non-null values in the column.
|
1635
|
+
#
|
1636
|
+
# @param reverse [Boolean]
|
1637
|
+
# Reverse the operation.
|
1638
|
+
#
|
1639
|
+
# @return [Series]
|
1640
|
+
#
|
1641
|
+
# @example
|
1642
|
+
# s = Polars::Series.new(["x", "k", nil, "d"])
|
1643
|
+
# s.cum_count
|
1644
|
+
# # =>
|
1645
|
+
# # shape: (4,)
|
1646
|
+
# # Series: '' [u32]
|
1647
|
+
# # [
|
1648
|
+
# # 1
|
1649
|
+
# # 2
|
1650
|
+
# # 2
|
1651
|
+
# # 3
|
1652
|
+
# # ]
|
1653
|
+
def cum_count(reverse: false)
|
1654
|
+
super
|
1655
|
+
end
|
1656
|
+
|
1454
1657
|
# Get an array with the cumulative min computed at every element.
|
1455
1658
|
#
|
1456
1659
|
# @param reverse [Boolean]
|
@@ -1524,29 +1727,6 @@ module Polars
|
|
1524
1727
|
end
|
1525
1728
|
alias_method :cumprod, :cum_prod
|
1526
1729
|
|
1527
|
-
# Get the first `n` rows.
|
1528
|
-
#
|
1529
|
-
# Alias for {#head}.
|
1530
|
-
#
|
1531
|
-
# @param n [Integer]
|
1532
|
-
# Number of rows to return.
|
1533
|
-
#
|
1534
|
-
# @return [Series]
|
1535
|
-
#
|
1536
|
-
# @example
|
1537
|
-
# s = Polars::Series.new("a", [1, 2, 3])
|
1538
|
-
# s.limit(2)
|
1539
|
-
# # =>
|
1540
|
-
# # shape: (2,)
|
1541
|
-
# # Series: 'a' [i64]
|
1542
|
-
# # [
|
1543
|
-
# # 1
|
1544
|
-
# # 2
|
1545
|
-
# # ]
|
1546
|
-
def limit(n = 10)
|
1547
|
-
to_frame.select(F.col(name).limit(n)).to_series
|
1548
|
-
end
|
1549
|
-
|
1550
1730
|
# Get a slice of this Series.
|
1551
1731
|
#
|
1552
1732
|
# @param offset [Integer]
|
@@ -1700,13 +1880,41 @@ module Polars
|
|
1700
1880
|
to_frame.select(F.col(name).tail(n)).to_series
|
1701
1881
|
end
|
1702
1882
|
|
1883
|
+
# Get the first `n` rows.
|
1884
|
+
#
|
1885
|
+
# Alias for {#head}.
|
1886
|
+
#
|
1887
|
+
# @param n [Integer]
|
1888
|
+
# Number of rows to return.
|
1889
|
+
#
|
1890
|
+
# @return [Series]
|
1891
|
+
#
|
1892
|
+
# @example
|
1893
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1894
|
+
# s.limit(2)
|
1895
|
+
# # =>
|
1896
|
+
# # shape: (2,)
|
1897
|
+
# # Series: 'a' [i64]
|
1898
|
+
# # [
|
1899
|
+
# # 1
|
1900
|
+
# # 2
|
1901
|
+
# # ]
|
1902
|
+
def limit(n = 10)
|
1903
|
+
to_frame.select(F.col(name).limit(n)).to_series
|
1904
|
+
end
|
1905
|
+
|
1703
1906
|
# Take every nth value in the Series and return as new Series.
|
1704
1907
|
#
|
1908
|
+
# @param n [Integer]
|
1909
|
+
# Gather every *n*-th row.
|
1910
|
+
# @param offset [Integer]
|
1911
|
+
# Start the row index at this offset.
|
1912
|
+
#
|
1705
1913
|
# @return [Series]
|
1706
1914
|
#
|
1707
1915
|
# @example
|
1708
1916
|
# s = Polars::Series.new("a", [1, 2, 3, 4])
|
1709
|
-
# s.
|
1917
|
+
# s.gather_every(2)
|
1710
1918
|
# # =>
|
1711
1919
|
# # shape: (2,)
|
1712
1920
|
# # Series: 'a' [i64]
|
@@ -1714,14 +1922,29 @@ module Polars
|
|
1714
1922
|
# # 1
|
1715
1923
|
# # 3
|
1716
1924
|
# # ]
|
1717
|
-
|
1925
|
+
#
|
1926
|
+
# @example
|
1927
|
+
# s.gather_every(2, 1)
|
1928
|
+
# # =>
|
1929
|
+
# # shape: (2,)
|
1930
|
+
# # Series: 'a' [i64]
|
1931
|
+
# # [
|
1932
|
+
# # 2
|
1933
|
+
# # 4
|
1934
|
+
# # ]
|
1935
|
+
def gather_every(n, offset = 0)
|
1718
1936
|
super
|
1719
1937
|
end
|
1938
|
+
alias_method :take_every, :gather_every
|
1720
1939
|
|
1721
1940
|
# Sort this Series.
|
1722
1941
|
#
|
1723
1942
|
# @param reverse [Boolean]
|
1724
1943
|
# Reverse sort.
|
1944
|
+
# @param nulls_last [Boolean]
|
1945
|
+
# Place null values last instead of first.
|
1946
|
+
# @param multithreaded [Boolean]
|
1947
|
+
# Sort using multiple threads.
|
1725
1948
|
# @param in_place [Boolean]
|
1726
1949
|
# Sort in place.
|
1727
1950
|
#
|
@@ -1780,6 +2003,44 @@ module Polars
|
|
1780
2003
|
super
|
1781
2004
|
end
|
1782
2005
|
|
2006
|
+
# Return the `k` largest elements of the `by` column.
|
2007
|
+
#
|
2008
|
+
# Non-null elements are always preferred over null elements, regardless of
|
2009
|
+
# the value of `reverse`. The output is not guaranteed to be in any
|
2010
|
+
# particular order, call `sort` after this function if you wish the
|
2011
|
+
# output to be sorted.
|
2012
|
+
#
|
2013
|
+
# @param by [Object]
|
2014
|
+
# Column used to determine the largest elements.
|
2015
|
+
# Accepts expression input. Strings are parsed as column names.
|
2016
|
+
# @param k [Integer]
|
2017
|
+
# Number of elements to return.
|
2018
|
+
# @param reverse [Object]
|
2019
|
+
# Consider the `k` smallest elements of the `by` column (instead of the `k`
|
2020
|
+
# largest). This can be specified per column by passing a sequence of
|
2021
|
+
# booleans.
|
2022
|
+
#
|
2023
|
+
# @return [Series]
|
2024
|
+
#
|
2025
|
+
# @example
|
2026
|
+
# s = Polars::Series.new("a", [2, 5, 1, 4, 3])
|
2027
|
+
# s.top_k_by("a", k: 3)
|
2028
|
+
# # =>
|
2029
|
+
# # shape: (3,)
|
2030
|
+
# # Series: 'a' [i64]
|
2031
|
+
# # [
|
2032
|
+
# # 5
|
2033
|
+
# # 4
|
2034
|
+
# # 3
|
2035
|
+
# # ]
|
2036
|
+
def top_k_by(
|
2037
|
+
by,
|
2038
|
+
k: 5,
|
2039
|
+
reverse: false
|
2040
|
+
)
|
2041
|
+
super
|
2042
|
+
end
|
2043
|
+
|
1783
2044
|
# Return the `k` smallest elements.
|
1784
2045
|
#
|
1785
2046
|
# @param k [Integer]
|
@@ -1802,6 +2063,44 @@ module Polars
|
|
1802
2063
|
super
|
1803
2064
|
end
|
1804
2065
|
|
2066
|
+
# Return the `k` smallest elements of the `by` column.
|
2067
|
+
#
|
2068
|
+
# Non-null elements are always preferred over null elements, regardless of
|
2069
|
+
# the value of `reverse`. The output is not guaranteed to be in any
|
2070
|
+
# particular order, call `sort` after this function if you wish the
|
2071
|
+
# output to be sorted.
|
2072
|
+
#
|
2073
|
+
# @param by [Object]
|
2074
|
+
# Column used to determine the smallest elements.
|
2075
|
+
# Accepts expression input. Strings are parsed as column names.
|
2076
|
+
# @param k [Integer]
|
2077
|
+
# Number of elements to return.
|
2078
|
+
# @param reverse [Object]
|
2079
|
+
# Consider the `k` largest elements of the `by` column( (instead of the `k`
|
2080
|
+
# smallest). This can be specified per column by passing a sequence of
|
2081
|
+
# booleans.
|
2082
|
+
#
|
2083
|
+
# @return [Series]
|
2084
|
+
#
|
2085
|
+
# @example
|
2086
|
+
# s = Polars::Series.new("a", [2, 5, 1, 4, 3])
|
2087
|
+
# s.bottom_k_by("a", k: 3)
|
2088
|
+
# # =>
|
2089
|
+
# # shape: (3,)
|
2090
|
+
# # Series: 'a' [i64]
|
2091
|
+
# # [
|
2092
|
+
# # 1
|
2093
|
+
# # 2
|
2094
|
+
# # 3
|
2095
|
+
# # ]
|
2096
|
+
def bottom_k_by(
|
2097
|
+
by,
|
2098
|
+
k: 5,
|
2099
|
+
reverse: false
|
2100
|
+
)
|
2101
|
+
super
|
2102
|
+
end
|
2103
|
+
|
1805
2104
|
# Get the index values that would sort this Series.
|
1806
2105
|
#
|
1807
2106
|
# @param reverse [Boolean]
|
@@ -1971,7 +2270,7 @@ module Polars
|
|
1971
2270
|
#
|
1972
2271
|
# @example
|
1973
2272
|
# s = Polars::Series.new("a", [1, 2, 3, 4])
|
1974
|
-
# s.
|
2273
|
+
# s.gather([1, 3])
|
1975
2274
|
# # =>
|
1976
2275
|
# # shape: (2,)
|
1977
2276
|
# # Series: 'a' [i64]
|
@@ -1979,9 +2278,10 @@ module Polars
|
|
1979
2278
|
# # 2
|
1980
2279
|
# # 4
|
1981
2280
|
# # ]
|
1982
|
-
def
|
1983
|
-
|
2281
|
+
def gather(indices)
|
2282
|
+
super
|
1984
2283
|
end
|
2284
|
+
alias_method :take, :gather
|
1985
2285
|
|
1986
2286
|
# Count the null values in this Series.
|
1987
2287
|
#
|
@@ -2028,6 +2328,48 @@ module Polars
|
|
2028
2328
|
end
|
2029
2329
|
alias_method :empty?, :is_empty
|
2030
2330
|
|
2331
|
+
# Check if the Series is sorted.
|
2332
|
+
#
|
2333
|
+
# @param descending [Boolean]
|
2334
|
+
# Check if the Series is sorted in descending order
|
2335
|
+
# @param nulls_last [Boolean]
|
2336
|
+
# Set nulls at the end of the Series in sorted check.
|
2337
|
+
#
|
2338
|
+
# @return [Boolean]
|
2339
|
+
#
|
2340
|
+
# @example
|
2341
|
+
# s = Polars::Series.new([1, 3, 2])
|
2342
|
+
# s.is_sorted
|
2343
|
+
# # => false
|
2344
|
+
#
|
2345
|
+
# @example
|
2346
|
+
# s = Polars::Series.new([3, 2, 1])
|
2347
|
+
# s.is_sorted(descending: true)
|
2348
|
+
# # => true
|
2349
|
+
def is_sorted(descending: false, nulls_last: false)
|
2350
|
+
_s.is_sorted(descending, nulls_last)
|
2351
|
+
end
|
2352
|
+
alias_method :sorted?, :is_sorted
|
2353
|
+
|
2354
|
+
# Negate a boolean Series.
|
2355
|
+
#
|
2356
|
+
# @return [Series]
|
2357
|
+
#
|
2358
|
+
# @example
|
2359
|
+
# s = Polars::Series.new("a", [true, false, false])
|
2360
|
+
# s.not_
|
2361
|
+
# # =>
|
2362
|
+
# # shape: (3,)
|
2363
|
+
# # Series: 'a' [bool]
|
2364
|
+
# # [
|
2365
|
+
# # false
|
2366
|
+
# # true
|
2367
|
+
# # true
|
2368
|
+
# # ]
|
2369
|
+
def not_
|
2370
|
+
self.class._from_rbseries(_s.not_)
|
2371
|
+
end
|
2372
|
+
|
2031
2373
|
# Returns a boolean Series indicating which values are null.
|
2032
2374
|
#
|
2033
2375
|
# @return [Series]
|
@@ -2154,7 +2496,7 @@ module Polars
|
|
2154
2496
|
# @return [Series]
|
2155
2497
|
#
|
2156
2498
|
# @example
|
2157
|
-
# s = Polars::Series.new("a", [1, 2, 3])
|
2499
|
+
# s = Polars::Series.new("a", [[1, 2, 3]])
|
2158
2500
|
# s2 = Polars::Series.new("b", [2, 4, nil])
|
2159
2501
|
# s2.is_in(s)
|
2160
2502
|
# # =>
|
@@ -2273,6 +2615,28 @@ module Polars
|
|
2273
2615
|
end
|
2274
2616
|
alias_method :is_first, :is_first_distinct
|
2275
2617
|
|
2618
|
+
|
2619
|
+
# Return a boolean mask indicating the last occurrence of each distinct value.
|
2620
|
+
#
|
2621
|
+
# @return [Series]
|
2622
|
+
#
|
2623
|
+
# @example
|
2624
|
+
# s = Polars::Series.new([1, 1, 2, 3, 2])
|
2625
|
+
# s.is_last_distinct
|
2626
|
+
# # =>
|
2627
|
+
# # shape: (5,)
|
2628
|
+
# # Series: '' [bool]
|
2629
|
+
# # [
|
2630
|
+
# # false
|
2631
|
+
# # true
|
2632
|
+
# # false
|
2633
|
+
# # true
|
2634
|
+
# # true
|
2635
|
+
# # ]
|
2636
|
+
def is_last_distinct
|
2637
|
+
super
|
2638
|
+
end
|
2639
|
+
|
2276
2640
|
# Get mask of all duplicated values.
|
2277
2641
|
#
|
2278
2642
|
# @return [Series]
|
@@ -2490,12 +2854,130 @@ module Polars
|
|
2490
2854
|
super
|
2491
2855
|
end
|
2492
2856
|
|
2493
|
-
#
|
2857
|
+
# Get a boolean mask of the values that are between the given lower/upper bounds.
|
2494
2858
|
#
|
2495
|
-
# @
|
2859
|
+
# @param lower_bound [Object]
|
2860
|
+
# Lower bound value. Accepts expression input. Non-expression inputs
|
2861
|
+
# (including strings) are parsed as literals.
|
2862
|
+
# @param upper_bound [Object]
|
2863
|
+
# Upper bound value. Accepts expression input. Non-expression inputs
|
2864
|
+
# (including strings) are parsed as literals.
|
2865
|
+
# @param closed ['both', 'left', 'right', 'none']
|
2866
|
+
# Define which sides of the interval are closed (inclusive).
|
2496
2867
|
#
|
2497
|
-
# @
|
2498
|
-
#
|
2868
|
+
# @return [Series]
|
2869
|
+
#
|
2870
|
+
# @note
|
2871
|
+
# If the value of the `lower_bound` is greater than that of the `upper_bound`
|
2872
|
+
# then the result will be False, as no value can satisfy the condition.
|
2873
|
+
#
|
2874
|
+
# @example
|
2875
|
+
# s = Polars::Series.new("num", [1, 2, 3, 4, 5])
|
2876
|
+
# s.is_between(2, 4)
|
2877
|
+
# # =>
|
2878
|
+
# # shape: (5,)
|
2879
|
+
# # Series: 'num' [bool]
|
2880
|
+
# # [
|
2881
|
+
# # false
|
2882
|
+
# # true
|
2883
|
+
# # true
|
2884
|
+
# # true
|
2885
|
+
# # false
|
2886
|
+
# # ]
|
2887
|
+
#
|
2888
|
+
# @example Use the `closed` argument to include or exclude the values at the bounds:
|
2889
|
+
# s.is_between(2, 4, closed: "left")
|
2890
|
+
# # =>
|
2891
|
+
# # shape: (5,)
|
2892
|
+
# # Series: 'num' [bool]
|
2893
|
+
# # [
|
2894
|
+
# # false
|
2895
|
+
# # true
|
2896
|
+
# # true
|
2897
|
+
# # false
|
2898
|
+
# # false
|
2899
|
+
# # ]
|
2900
|
+
#
|
2901
|
+
# @example You can also use strings as well as numeric/temporal values:
|
2902
|
+
# s = Polars::Series.new("s", ["a", "b", "c", "d", "e"])
|
2903
|
+
# s.is_between("b", "d", closed: "both")
|
2904
|
+
# # =>
|
2905
|
+
# # shape: (5,)
|
2906
|
+
# # Series: 's' [bool]
|
2907
|
+
# # [
|
2908
|
+
# # false
|
2909
|
+
# # true
|
2910
|
+
# # true
|
2911
|
+
# # true
|
2912
|
+
# # false
|
2913
|
+
# # ]
|
2914
|
+
def is_between(
|
2915
|
+
lower_bound,
|
2916
|
+
upper_bound,
|
2917
|
+
closed: "both"
|
2918
|
+
)
|
2919
|
+
if closed == "none"
|
2920
|
+
out = (self > lower_bound) & (self < upper_bound)
|
2921
|
+
elsif closed == "both"
|
2922
|
+
out = (self >= lower_bound) & (self <= upper_bound)
|
2923
|
+
elsif closed == "right"
|
2924
|
+
out = (self > lower_bound) & (self <= upper_bound)
|
2925
|
+
elsif closed == "left"
|
2926
|
+
out = (self >= lower_bound) & (self < upper_bound)
|
2927
|
+
end
|
2928
|
+
|
2929
|
+
if out.is_a?(Expr)
|
2930
|
+
out = F.select(out).to_series
|
2931
|
+
end
|
2932
|
+
|
2933
|
+
out
|
2934
|
+
end
|
2935
|
+
|
2936
|
+
# Get a boolean mask of the values being close to the other values.
|
2937
|
+
#
|
2938
|
+
# @param abs_tol [Float]
|
2939
|
+
# Absolute tolerance. This is the maximum allowed absolute difference between
|
2940
|
+
# two values. Must be non-negative.
|
2941
|
+
# @param rel_tol [Float]
|
2942
|
+
# Relative tolerance. This is the maximum allowed difference between two
|
2943
|
+
# values, relative to the larger absolute value. Must be in the range [0, 1).
|
2944
|
+
# @param nans_equal [Boolean]
|
2945
|
+
# Whether NaN values should be considered equal.
|
2946
|
+
#
|
2947
|
+
# @return [Series]
|
2948
|
+
#
|
2949
|
+
# @example
|
2950
|
+
# s = Polars::Series.new("s", [1.0, 1.2, 1.4, 1.45, 1.6])
|
2951
|
+
# s.is_close(1.4, abs_tol: 0.1)
|
2952
|
+
# # =>
|
2953
|
+
# # shape: (5,)
|
2954
|
+
# # Series: 's' [bool]
|
2955
|
+
# # [
|
2956
|
+
# # false
|
2957
|
+
# # false
|
2958
|
+
# # true
|
2959
|
+
# # true
|
2960
|
+
# # false
|
2961
|
+
# # ]
|
2962
|
+
def is_close(
|
2963
|
+
other,
|
2964
|
+
abs_tol: 0.0,
|
2965
|
+
rel_tol: 1e-09,
|
2966
|
+
nans_equal: false
|
2967
|
+
)
|
2968
|
+
F.select(
|
2969
|
+
F.lit(self).is_close(
|
2970
|
+
other, abs_tol: abs_tol, rel_tol: rel_tol, nans_equal: nans_equal
|
2971
|
+
)
|
2972
|
+
).to_series
|
2973
|
+
end
|
2974
|
+
|
2975
|
+
# Check if this Series datatype is numeric.
|
2976
|
+
#
|
2977
|
+
# @return [Boolean]
|
2978
|
+
#
|
2979
|
+
# @example
|
2980
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2499
2981
|
# s.is_numeric
|
2500
2982
|
# # => true
|
2501
2983
|
def is_numeric
|
@@ -2653,23 +3135,69 @@ module Polars
|
|
2653
3135
|
end
|
2654
3136
|
alias_method :set_at_idx, :scatter
|
2655
3137
|
|
3138
|
+
# Get the index of the first occurrence of a value, or `nil` if it's not found.
|
3139
|
+
#
|
3140
|
+
# @param element [Object]
|
3141
|
+
# Value to find.
|
3142
|
+
#
|
3143
|
+
# @return [Object]
|
3144
|
+
#
|
3145
|
+
# @example
|
3146
|
+
# s = Polars::Series.new("a", [1, nil, 17])
|
3147
|
+
# s.index_of(17)
|
3148
|
+
# # => 2
|
3149
|
+
#
|
3150
|
+
# @example
|
3151
|
+
# s.index_of(nil) # search for a null
|
3152
|
+
# # => 1
|
3153
|
+
#
|
3154
|
+
# @example
|
3155
|
+
# s.index_of(55).nil?
|
3156
|
+
# # => true
|
3157
|
+
def index_of(element)
|
3158
|
+
F.select(F.lit(self).index_of(element)).item
|
3159
|
+
end
|
3160
|
+
|
2656
3161
|
# Create an empty copy of the current Series.
|
2657
3162
|
#
|
2658
3163
|
# The copy has identical name/dtype but no data.
|
2659
3164
|
#
|
3165
|
+
# @param n [Integer]
|
3166
|
+
# Number of (empty) elements to return in the cleared frame.
|
3167
|
+
#
|
2660
3168
|
# @return [Series]
|
2661
3169
|
#
|
2662
3170
|
# @example
|
2663
3171
|
# s = Polars::Series.new("a", [nil, true, false])
|
2664
|
-
# s.
|
3172
|
+
# s.clear
|
2665
3173
|
# # =>
|
2666
3174
|
# # shape: (0,)
|
2667
3175
|
# # Series: 'a' [bool]
|
2668
3176
|
# # [
|
2669
3177
|
# # ]
|
2670
|
-
|
2671
|
-
|
3178
|
+
#
|
3179
|
+
# @example
|
3180
|
+
# s.clear(n: 2)
|
3181
|
+
# # =>
|
3182
|
+
# # shape: (2,)
|
3183
|
+
# # Series: 'a' [bool]
|
3184
|
+
# # [
|
3185
|
+
# # null
|
3186
|
+
# # null
|
3187
|
+
# # ]
|
3188
|
+
def clear(n: 0)
|
3189
|
+
if n < 0
|
3190
|
+
msg = "`n` should be greater than or equal to 0, got #{n}"
|
3191
|
+
raise ArgumentError, msg
|
3192
|
+
end
|
3193
|
+
# faster path
|
3194
|
+
if n == 0
|
3195
|
+
return self.class._from_rbseries(_s.clear)
|
3196
|
+
end
|
3197
|
+
s = len > 0 ? self.class.new(name, [], dtype: dtype) : clone
|
3198
|
+
n > 0 ? s.extend_constant(nil, n) : s
|
2672
3199
|
end
|
3200
|
+
alias_method :cleared, :clear
|
2673
3201
|
|
2674
3202
|
# clone handled by initialize_copy
|
2675
3203
|
|
@@ -2748,6 +3276,30 @@ module Polars
|
|
2748
3276
|
super
|
2749
3277
|
end
|
2750
3278
|
|
3279
|
+
# Fill missing values with the next non-null value.
|
3280
|
+
#
|
3281
|
+
# This is an alias of `.fill_null(strategy: "backward")`.
|
3282
|
+
#
|
3283
|
+
# @param limit [Integer]
|
3284
|
+
# The number of consecutive null values to backward fill.
|
3285
|
+
#
|
3286
|
+
# @return [Series]
|
3287
|
+
def backward_fill(limit: nil)
|
3288
|
+
fill_null(strategy: "backward", limit: limit)
|
3289
|
+
end
|
3290
|
+
|
3291
|
+
# Fill missing values with the next non-null value.
|
3292
|
+
#
|
3293
|
+
# This is an alias of `.fill_null(strategy: "forward")`.
|
3294
|
+
#
|
3295
|
+
# @param limit [Integer]
|
3296
|
+
# The number of consecutive null values to forward fill.
|
3297
|
+
#
|
3298
|
+
# @return [Series]
|
3299
|
+
def forward_fill(limit: nil)
|
3300
|
+
fill_null(strategy: "forward", limit: limit)
|
3301
|
+
end
|
3302
|
+
|
2751
3303
|
# Rounds down to the nearest integer value.
|
2752
3304
|
#
|
2753
3305
|
# Only works on floating point Series.
|
@@ -2812,6 +3364,28 @@ module Polars
|
|
2812
3364
|
super
|
2813
3365
|
end
|
2814
3366
|
|
3367
|
+
# Round to a number of significant figures.
|
3368
|
+
#
|
3369
|
+
# @param digits [Integer]
|
3370
|
+
# Number of significant figures to round to.
|
3371
|
+
#
|
3372
|
+
# @return [Series]
|
3373
|
+
#
|
3374
|
+
# @example
|
3375
|
+
# s = Polars::Series.new([0.01234, 3.333, 3450.0])
|
3376
|
+
# s.round_sig_figs(2)
|
3377
|
+
# # =>
|
3378
|
+
# # shape: (3,)
|
3379
|
+
# # Series: '' [f64]
|
3380
|
+
# # [
|
3381
|
+
# # 0.012
|
3382
|
+
# # 3.3
|
3383
|
+
# # 3500.0
|
3384
|
+
# # ]
|
3385
|
+
def round_sig_figs(digits)
|
3386
|
+
super
|
3387
|
+
end
|
3388
|
+
|
2815
3389
|
# Compute the dot/inner product between two Series.
|
2816
3390
|
#
|
2817
3391
|
# @param other [Object]
|
@@ -2932,6 +3506,25 @@ module Polars
|
|
2932
3506
|
super
|
2933
3507
|
end
|
2934
3508
|
|
3509
|
+
# Compute the element-wise value for the cotangent.
|
3510
|
+
#
|
3511
|
+
# @return [Series]
|
3512
|
+
#
|
3513
|
+
# @example
|
3514
|
+
# s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
|
3515
|
+
# s.cot
|
3516
|
+
# # =>
|
3517
|
+
# # shape: (3,)
|
3518
|
+
# # Series: 'a' [f64]
|
3519
|
+
# # [
|
3520
|
+
# # inf
|
3521
|
+
# # 6.1232e-17
|
3522
|
+
# # -8.1656e15
|
3523
|
+
# # ]
|
3524
|
+
def cot
|
3525
|
+
super
|
3526
|
+
end
|
3527
|
+
|
2935
3528
|
# Compute the element-wise value for the inverse sine.
|
2936
3529
|
#
|
2937
3530
|
# @return [Series]
|
@@ -3132,7 +3725,7 @@ module Polars
|
|
3132
3725
|
#
|
3133
3726
|
# @example
|
3134
3727
|
# s = Polars::Series.new("a", [1, 2, 3])
|
3135
|
-
# s.map_elements { |x| x + 10 }
|
3728
|
+
# s.map_elements(return_dtype: Polars::Int64) { |x| x + 10 }
|
3136
3729
|
# # =>
|
3137
3730
|
# # shape: (3,)
|
3138
3731
|
# # Series: 'a' [i64]
|
@@ -3147,7 +3740,7 @@ module Polars
|
|
3147
3740
|
else
|
3148
3741
|
pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
3149
3742
|
end
|
3150
|
-
Utils.wrap_s(_s.
|
3743
|
+
Utils.wrap_s(_s.map_elements(func, pl_return_dtype, skip_nulls))
|
3151
3744
|
end
|
3152
3745
|
alias_method :map, :map_elements
|
3153
3746
|
alias_method :apply, :map_elements
|
@@ -3190,7 +3783,7 @@ module Polars
|
|
3190
3783
|
# @param periods [Integer]
|
3191
3784
|
# Number of places to shift (may be negative).
|
3192
3785
|
# @param fill_value [Object]
|
3193
|
-
# Fill
|
3786
|
+
# Fill nil values with the result of this expression.
|
3194
3787
|
#
|
3195
3788
|
# @return [Series]
|
3196
3789
|
def shift_and_fill(periods, fill_value)
|
@@ -3254,7 +3847,7 @@ module Polars
|
|
3254
3847
|
# elementwise with the values in the window.
|
3255
3848
|
# @param min_periods [Integer]
|
3256
3849
|
# The number of values in the window that should be non-null before computing
|
3257
|
-
# a result. If
|
3850
|
+
# a result. If nil, it will be set equal to window size.
|
3258
3851
|
# @param center [Boolean]
|
3259
3852
|
# Set the labels at the center of the window
|
3260
3853
|
#
|
@@ -3295,7 +3888,7 @@ module Polars
|
|
3295
3888
|
# elementwise with the values in the window.
|
3296
3889
|
# @param min_periods [Integer]
|
3297
3890
|
# The number of values in the window that should be non-null before computing
|
3298
|
-
# a result. If
|
3891
|
+
# a result. If nil, it will be set equal to window size.
|
3299
3892
|
# @param center [Boolean]
|
3300
3893
|
# Set the labels at the center of the window
|
3301
3894
|
#
|
@@ -3336,7 +3929,7 @@ module Polars
|
|
3336
3929
|
# elementwise with the values in the window.
|
3337
3930
|
# @param min_periods [Integer]
|
3338
3931
|
# The number of values in the window that should be non-null before computing
|
3339
|
-
# a result. If
|
3932
|
+
# a result. If nil, it will be set equal to window size.
|
3340
3933
|
# @param center [Boolean]
|
3341
3934
|
# Set the labels at the center of the window
|
3342
3935
|
#
|
@@ -3377,7 +3970,7 @@ module Polars
|
|
3377
3970
|
# elementwise with the values in the window.
|
3378
3971
|
# @param min_periods [Integer]
|
3379
3972
|
# The number of values in the window that should be non-null before computing
|
3380
|
-
# a result. If
|
3973
|
+
# a result. If nil, it will be set equal to window size.
|
3381
3974
|
# @param center [Boolean]
|
3382
3975
|
# Set the labels at the center of the window
|
3383
3976
|
#
|
@@ -3418,9 +4011,11 @@ module Polars
|
|
3418
4011
|
# elementwise with the values in the window.
|
3419
4012
|
# @param min_periods [Integer]
|
3420
4013
|
# The number of values in the window that should be non-null before computing
|
3421
|
-
# a result. If
|
4014
|
+
# a result. If nil, it will be set equal to window size.
|
3422
4015
|
# @param center [Boolean]
|
3423
4016
|
# Set the labels at the center of the window
|
4017
|
+
# @param ddof [Integer]
|
4018
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
3424
4019
|
#
|
3425
4020
|
# @return [Series]
|
3426
4021
|
#
|
@@ -3461,9 +4056,11 @@ module Polars
|
|
3461
4056
|
# elementwise with the values in the window.
|
3462
4057
|
# @param min_periods [Integer]
|
3463
4058
|
# The number of values in the window that should be non-null before computing
|
3464
|
-
# a result. If
|
4059
|
+
# a result. If nil, it will be set equal to window size.
|
3465
4060
|
# @param center [Boolean]
|
3466
4061
|
# Set the labels at the center of the window
|
4062
|
+
# @param ddof [Integer]
|
4063
|
+
# "Delta Degrees of Freedom": The divisor for a length N window is N - ddof
|
3467
4064
|
#
|
3468
4065
|
# @return [Series]
|
3469
4066
|
#
|
@@ -3503,7 +4100,7 @@ module Polars
|
|
3503
4100
|
# elementwise with the values in the window.
|
3504
4101
|
# @param min_periods [Integer]
|
3505
4102
|
# The number of values in the window that should be non-null before computing
|
3506
|
-
# a result. If
|
4103
|
+
# a result. If nil, it will be set equal to window size.
|
3507
4104
|
# @param center [Boolean]
|
3508
4105
|
# Set the labels at the center of the window
|
3509
4106
|
#
|
@@ -3545,7 +4142,7 @@ module Polars
|
|
3545
4142
|
# elementwise with the values in the window.
|
3546
4143
|
# @param min_periods [Integer]
|
3547
4144
|
# The number of values in the window that should be non-null before computing
|
3548
|
-
# a result. If
|
4145
|
+
# a result. If nil, it will be set equal to window size.
|
3549
4146
|
# @param center [Boolean]
|
3550
4147
|
# Set the labels at the center of the window
|
3551
4148
|
#
|
@@ -3560,10 +4157,10 @@ module Polars
|
|
3560
4157
|
# # [
|
3561
4158
|
# # null
|
3562
4159
|
# # null
|
3563
|
-
# # 1.0
|
3564
4160
|
# # 2.0
|
3565
4161
|
# # 3.0
|
3566
4162
|
# # 4.0
|
4163
|
+
# # 6.0
|
3567
4164
|
# # ]
|
3568
4165
|
#
|
3569
4166
|
# @example
|
@@ -3619,11 +4216,56 @@ module Polars
|
|
3619
4216
|
super
|
3620
4217
|
end
|
3621
4218
|
|
4219
|
+
# Compute a rolling kurtosis.
|
4220
|
+
#
|
4221
|
+
# @note
|
4222
|
+
# This functionality is considered **unstable**. It may be changed
|
4223
|
+
# at any point without it being considered a breaking change.
|
4224
|
+
#
|
4225
|
+
# The window at a given row will include the row itself, and the `window_size - 1`
|
4226
|
+
# elements before it.
|
4227
|
+
#
|
4228
|
+
# @param window_size [Integer]
|
4229
|
+
# Integer size of the rolling window.
|
4230
|
+
# @param fisher [Boolean]
|
4231
|
+
# If true, Fisher's definition is used (normal ==> 0.0). If false,
|
4232
|
+
# Pearson's definition is used (normal ==> 3.0).
|
4233
|
+
# @param bias [Boolean]
|
4234
|
+
# If false, the calculations are corrected for statistical bias.
|
4235
|
+
# @param min_samples [Integer]
|
4236
|
+
# The number of values in the window that should be non-null before computing
|
4237
|
+
# a result. If set to `nil` (default), it will be set equal to `window_size`.
|
4238
|
+
# @param center [Boolean]
|
4239
|
+
# Set the labels at the center of the window.
|
4240
|
+
#
|
4241
|
+
# @return [Series]
|
4242
|
+
#
|
4243
|
+
# @example
|
4244
|
+
# Polars::Series.new([1, 4, 2, 9]).rolling_kurtosis(3)
|
4245
|
+
# # =>
|
4246
|
+
# # shape: (4,)
|
4247
|
+
# # Series: '' [f64]
|
4248
|
+
# # [
|
4249
|
+
# # null
|
4250
|
+
# # null
|
4251
|
+
# # -1.5
|
4252
|
+
# # -1.5
|
4253
|
+
# # ]
|
4254
|
+
def rolling_kurtosis(
|
4255
|
+
window_size,
|
4256
|
+
fisher: true,
|
4257
|
+
bias: true,
|
4258
|
+
min_samples: nil,
|
4259
|
+
center: false
|
4260
|
+
)
|
4261
|
+
super
|
4262
|
+
end
|
4263
|
+
|
3622
4264
|
# Sample from this Series.
|
3623
4265
|
#
|
3624
4266
|
# @param n [Integer]
|
3625
4267
|
# Number of items to return. Cannot be used with `frac`. Defaults to 1 if
|
3626
|
-
# `frac` is
|
4268
|
+
# `frac` is nil.
|
3627
4269
|
# @param frac [Float]
|
3628
4270
|
# Fraction of items to return. Cannot be used with `n`.
|
3629
4271
|
# @param with_replacement [Boolean]
|
@@ -3631,7 +4273,7 @@ module Polars
|
|
3631
4273
|
# @param shuffle [Boolean]
|
3632
4274
|
# Shuffle the order of sampled data points.
|
3633
4275
|
# @param seed [Integer]
|
3634
|
-
# Seed for the random number generator. If set to
|
4276
|
+
# Seed for the random number generator. If set to nil (default), a random
|
3635
4277
|
# seed is used.
|
3636
4278
|
#
|
3637
4279
|
# @return [Series]
|
@@ -3644,7 +4286,7 @@ module Polars
|
|
3644
4286
|
# # Series: 'a' [i64]
|
3645
4287
|
# # [
|
3646
4288
|
# # 5
|
3647
|
-
# #
|
4289
|
+
# # 2
|
3648
4290
|
# # ]
|
3649
4291
|
def sample(
|
3650
4292
|
n: nil,
|
@@ -4129,6 +4771,60 @@ module Polars
|
|
4129
4771
|
super
|
4130
4772
|
end
|
4131
4773
|
|
4774
|
+
# Return the lower bound of this Series' dtype as a unit Series.
|
4775
|
+
#
|
4776
|
+
# @return [Series]
|
4777
|
+
#
|
4778
|
+
# @example
|
4779
|
+
# s = Polars::Series.new("s", [-1, 0, 1], dtype: Polars::Int32)
|
4780
|
+
# s.lower_bound
|
4781
|
+
# # =>
|
4782
|
+
# # shape: (1,)
|
4783
|
+
# # Series: 's' [i32]
|
4784
|
+
# # [
|
4785
|
+
# # -2147483648
|
4786
|
+
# # ]
|
4787
|
+
#
|
4788
|
+
# @example
|
4789
|
+
# s = Polars::Series.new("s", [1.0, 2.5, 3.0], dtype: Polars::Float32)
|
4790
|
+
# s.lower_bound
|
4791
|
+
# # =>
|
4792
|
+
# # shape: (1,)
|
4793
|
+
# # Series: 's' [f32]
|
4794
|
+
# # [
|
4795
|
+
# # -inf
|
4796
|
+
# # ]
|
4797
|
+
def lower_bound
|
4798
|
+
super
|
4799
|
+
end
|
4800
|
+
|
4801
|
+
# Return the upper bound of this Series' dtype as a unit Series.
|
4802
|
+
#
|
4803
|
+
# @return [Series]
|
4804
|
+
#
|
4805
|
+
# @example
|
4806
|
+
# s = Polars::Series.new("s", [-1, 0, 1], dtype: Polars::Int8)
|
4807
|
+
# s.upper_bound
|
4808
|
+
# # =>
|
4809
|
+
# # shape: (1,)
|
4810
|
+
# # Series: 's' [i8]
|
4811
|
+
# # [
|
4812
|
+
# # 127
|
4813
|
+
# # ]
|
4814
|
+
#
|
4815
|
+
# @example
|
4816
|
+
# s = Polars::Series.new("s", [1.0, 2.5, 3.0], dtype: Polars::Float64)
|
4817
|
+
# s.upper_bound
|
4818
|
+
# # =>
|
4819
|
+
# # shape: (1,)
|
4820
|
+
# # Series: 's' [f64]
|
4821
|
+
# # [
|
4822
|
+
# # inf
|
4823
|
+
# # ]
|
4824
|
+
def upper_bound
|
4825
|
+
super
|
4826
|
+
end
|
4827
|
+
|
4132
4828
|
# Replace values by different values.
|
4133
4829
|
#
|
4134
4830
|
# @param old [Object]
|
@@ -4201,6 +4897,120 @@ module Polars
|
|
4201
4897
|
super
|
4202
4898
|
end
|
4203
4899
|
|
4900
|
+
# Replace all values by different values.
|
4901
|
+
#
|
4902
|
+
# @param old [Object]
|
4903
|
+
# Value or sequence of values to replace.
|
4904
|
+
# Also accepts a mapping of values to their replacement as syntactic sugar for
|
4905
|
+
# `replace_strict(old: Polars::Series.new(mapping.keys), new: Polars::Series.new(mapping.values))`.
|
4906
|
+
# @param new [Object]
|
4907
|
+
# Value or sequence of values to replace by.
|
4908
|
+
# Length must match the length of `old` or have length 1.
|
4909
|
+
# @param default [Object]
|
4910
|
+
# Set values that were not replaced to this value. If no default is specified,
|
4911
|
+
# (default), an error is raised if any values were not replaced.
|
4912
|
+
# Accepts expression input. Non-expression inputs are parsed as literals.
|
4913
|
+
# @param return_dtype [Object]
|
4914
|
+
# The data type of the resulting Series. If set to `nil` (default),
|
4915
|
+
# the data type is determined automatically based on the other inputs.
|
4916
|
+
#
|
4917
|
+
# @return [Series]
|
4918
|
+
#
|
4919
|
+
# @example Replace values by passing sequences to the `old` and `new` parameters.
|
4920
|
+
# s = Polars::Series.new([1, 2, 2, 3])
|
4921
|
+
# s.replace_strict([1, 2, 3], [100, 200, 300])
|
4922
|
+
# # =>
|
4923
|
+
# # shape: (4,)
|
4924
|
+
# # Series: '' [i64]
|
4925
|
+
# # [
|
4926
|
+
# # 100
|
4927
|
+
# # 200
|
4928
|
+
# # 200
|
4929
|
+
# # 300
|
4930
|
+
# # ]
|
4931
|
+
#
|
4932
|
+
# @example Passing a mapping with replacements is also supported as syntactic sugar.
|
4933
|
+
# mapping = {1 => 100, 2 => 200, 3 => 300}
|
4934
|
+
# s.replace_strict(mapping)
|
4935
|
+
# # =>
|
4936
|
+
# # shape: (4,)
|
4937
|
+
# # Series: '' [i64]
|
4938
|
+
# # [
|
4939
|
+
# # 100
|
4940
|
+
# # 200
|
4941
|
+
# # 200
|
4942
|
+
# # 300
|
4943
|
+
# # ]
|
4944
|
+
#
|
4945
|
+
# @example By default, an error is raised if any non-null values were not replaced. Specify a default to set all values that were not matched.
|
4946
|
+
# mapping = {2 => 200, 3 => 300}
|
4947
|
+
# s.replace_strict(mapping, default: -1)
|
4948
|
+
# # =>
|
4949
|
+
# # shape: (4,)
|
4950
|
+
# # Series: '' [i64]
|
4951
|
+
# # [
|
4952
|
+
# # -1
|
4953
|
+
# # 200
|
4954
|
+
# # 200
|
4955
|
+
# # 300
|
4956
|
+
# # ]
|
4957
|
+
#
|
4958
|
+
# @example The default can be another Series.
|
4959
|
+
# default = Polars::Series.new([2.5, 5.0, 7.5, 10.0])
|
4960
|
+
# s.replace_strict(2, 200, default: default)
|
4961
|
+
# # =>
|
4962
|
+
# # shape: (4,)
|
4963
|
+
# # Series: '' [f64]
|
4964
|
+
# # [
|
4965
|
+
# # 2.5
|
4966
|
+
# # 200.0
|
4967
|
+
# # 200.0
|
4968
|
+
# # 10.0
|
4969
|
+
# # ]
|
4970
|
+
#
|
4971
|
+
# @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and the `default` data type.
|
4972
|
+
# s = Polars::Series.new(["x", "y", "z"])
|
4973
|
+
# mapping = {"x" => 1, "y" => 2, "z" => 3}
|
4974
|
+
# s.replace_strict(mapping)
|
4975
|
+
# # =>
|
4976
|
+
# # shape: (3,)
|
4977
|
+
# # Series: '' [i64]
|
4978
|
+
# # [
|
4979
|
+
# # 1
|
4980
|
+
# # 2
|
4981
|
+
# # 3
|
4982
|
+
# # ]
|
4983
|
+
#
|
4984
|
+
# @example
|
4985
|
+
# s.replace_strict(mapping, default: "x")
|
4986
|
+
# # =>
|
4987
|
+
# # shape: (3,)
|
4988
|
+
# # Series: '' [str]
|
4989
|
+
# # [
|
4990
|
+
# # "1"
|
4991
|
+
# # "2"
|
4992
|
+
# # "3"
|
4993
|
+
# # ]
|
4994
|
+
#
|
4995
|
+
# @example Set the `return_dtype` parameter to control the resulting data type directly.
|
4996
|
+
# s.replace_strict(mapping, return_dtype: Polars::UInt8)
|
4997
|
+
# # =>
|
4998
|
+
# # shape: (3,)
|
4999
|
+
# # Series: '' [u8]
|
5000
|
+
# # [
|
5001
|
+
# # 1
|
5002
|
+
# # 2
|
5003
|
+
# # 3
|
5004
|
+
# # ]
|
5005
|
+
def replace_strict(
|
5006
|
+
old,
|
5007
|
+
new = Expr::NO_DEFAULT,
|
5008
|
+
default: Expr::NO_DEFAULT,
|
5009
|
+
return_dtype: nil
|
5010
|
+
)
|
5011
|
+
super
|
5012
|
+
end
|
5013
|
+
|
4204
5014
|
# Reshape this Series to a flat Series or a Series of Lists.
|
4205
5015
|
#
|
4206
5016
|
# @param dims [Array]
|
@@ -4256,8 +5066,8 @@ module Polars
|
|
4256
5066
|
# # Series: 'a' [i64]
|
4257
5067
|
# # [
|
4258
5068
|
# # 2
|
4259
|
-
# # 1
|
4260
5069
|
# # 3
|
5070
|
+
# # 1
|
4261
5071
|
# # ]
|
4262
5072
|
def shuffle(seed: nil)
|
4263
5073
|
super
|
@@ -4290,6 +5100,68 @@ module Polars
|
|
4290
5100
|
super
|
4291
5101
|
end
|
4292
5102
|
|
5103
|
+
# Compute time-based exponentially weighted moving average.
|
5104
|
+
#
|
5105
|
+
# @param by [Object]
|
5106
|
+
# Times to calculate average by. Should be `DateTime`, `Date`, `UInt64`,
|
5107
|
+
# `UInt32`, `Int64`, or `Int32` data type.
|
5108
|
+
# @param half_life [String]
|
5109
|
+
# Unit over which observation decays to half its value.
|
5110
|
+
#
|
5111
|
+
# Can be created either from a timedelta, or
|
5112
|
+
# by using the following string language:
|
5113
|
+
#
|
5114
|
+
# - 1ns (1 nanosecond)
|
5115
|
+
# - 1us (1 microsecond)
|
5116
|
+
# - 1ms (1 millisecond)
|
5117
|
+
# - 1s (1 second)
|
5118
|
+
# - 1m (1 minute)
|
5119
|
+
# - 1h (1 hour)
|
5120
|
+
# - 1d (1 day)
|
5121
|
+
# - 1w (1 week)
|
5122
|
+
# - 1i (1 index count)
|
5123
|
+
#
|
5124
|
+
# Or combine them:
|
5125
|
+
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
5126
|
+
#
|
5127
|
+
# Note that `half_life` is treated as a constant duration - calendar
|
5128
|
+
# durations such as months (or even days in the time-zone-aware case)
|
5129
|
+
# are not supported, please express your duration in an approximately
|
5130
|
+
# equivalent number of hours (e.g. '370h' instead of '1mo').
|
5131
|
+
#
|
5132
|
+
# @return [Series]
|
5133
|
+
#
|
5134
|
+
# @example
|
5135
|
+
# df = Polars::DataFrame.new(
|
5136
|
+
# {
|
5137
|
+
# "values" => [0, 1, 2, nil, 4],
|
5138
|
+
# "times" => [
|
5139
|
+
# Date.new(2020, 1, 1),
|
5140
|
+
# Date.new(2020, 1, 3),
|
5141
|
+
# Date.new(2020, 1, 10),
|
5142
|
+
# Date.new(2020, 1, 15),
|
5143
|
+
# Date.new(2020, 1, 17)
|
5144
|
+
# ]
|
5145
|
+
# }
|
5146
|
+
# ).sort("times")
|
5147
|
+
# df["values"].ewm_mean_by(df["times"], half_life: "4d")
|
5148
|
+
# # =>
|
5149
|
+
# # shape: (5,)
|
5150
|
+
# # Series: 'values' [f64]
|
5151
|
+
# # [
|
5152
|
+
# # 0.0
|
5153
|
+
# # 0.292893
|
5154
|
+
# # 1.492474
|
5155
|
+
# # null
|
5156
|
+
# # 3.254508
|
5157
|
+
# # ]
|
5158
|
+
def ewm_mean_by(
|
5159
|
+
by,
|
5160
|
+
half_life:
|
5161
|
+
)
|
5162
|
+
super
|
5163
|
+
end
|
5164
|
+
|
4293
5165
|
# Exponentially-weighted moving standard deviation.
|
4294
5166
|
#
|
4295
5167
|
# @return [Series]
|
@@ -4438,6 +5310,140 @@ module Polars
|
|
4438
5310
|
super
|
4439
5311
|
end
|
4440
5312
|
|
5313
|
+
# Get the chunks of this Series as a list of Series.
|
5314
|
+
#
|
5315
|
+
# @return [Array]
|
5316
|
+
#
|
5317
|
+
# @example
|
5318
|
+
# s1 = Polars::Series.new("a", [1, 2, 3])
|
5319
|
+
# s2 = Polars::Series.new("a", [4, 5, 6])
|
5320
|
+
# s = Polars.concat([s1, s2], rechunk: false)
|
5321
|
+
# s.get_chunks
|
5322
|
+
# # =>
|
5323
|
+
# # [shape: (3,)
|
5324
|
+
# # Series: 'a' [i64]
|
5325
|
+
# # [
|
5326
|
+
# # 1
|
5327
|
+
# # 2
|
5328
|
+
# # 3
|
5329
|
+
# # ], shape: (3,)
|
5330
|
+
# # Series: 'a' [i64]
|
5331
|
+
# # [
|
5332
|
+
# # 4
|
5333
|
+
# # 5
|
5334
|
+
# # 6
|
5335
|
+
# # ]]
|
5336
|
+
def get_chunks
|
5337
|
+
_s.get_chunks
|
5338
|
+
end
|
5339
|
+
|
5340
|
+
# Aggregate values into a list.
|
5341
|
+
#
|
5342
|
+
# @return [Series]
|
5343
|
+
#
|
5344
|
+
# @example
|
5345
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
5346
|
+
# s.implode
|
5347
|
+
# # =>
|
5348
|
+
# # shape: (1,)
|
5349
|
+
# # Series: 'a' [list[i64]]
|
5350
|
+
# # [
|
5351
|
+
# # [1, 2, 3]
|
5352
|
+
# # ]
|
5353
|
+
def implode
|
5354
|
+
super
|
5355
|
+
end
|
5356
|
+
|
5357
|
+
# Evaluate the number of set bits.
|
5358
|
+
#
|
5359
|
+
# @return [Series]
|
5360
|
+
def bitwise_count_ones
|
5361
|
+
super
|
5362
|
+
end
|
5363
|
+
|
5364
|
+
# Evaluate the number of unset bits.
|
5365
|
+
#
|
5366
|
+
# @return [Series]
|
5367
|
+
def bitwise_count_zeros
|
5368
|
+
super
|
5369
|
+
end
|
5370
|
+
|
5371
|
+
# Evaluate the number most-significant set bits before seeing an unset bit.
|
5372
|
+
#
|
5373
|
+
# @return [Series]
|
5374
|
+
def bitwise_leading_ones
|
5375
|
+
super
|
5376
|
+
end
|
5377
|
+
|
5378
|
+
# Evaluate the number most-significant unset bits before seeing a set bit.
|
5379
|
+
#
|
5380
|
+
# @return [Series]
|
5381
|
+
def bitwise_leading_zeros
|
5382
|
+
super
|
5383
|
+
end
|
5384
|
+
|
5385
|
+
# Evaluate the number least-significant set bits before seeing an unset bit.
|
5386
|
+
#
|
5387
|
+
# @return [Series]
|
5388
|
+
def bitwise_trailing_ones
|
5389
|
+
super
|
5390
|
+
end
|
5391
|
+
|
5392
|
+
# Evaluate the number least-significant unset bits before seeing a set bit.
|
5393
|
+
#
|
5394
|
+
# @return [Series]
|
5395
|
+
def bitwise_trailing_zeros
|
5396
|
+
super
|
5397
|
+
end
|
5398
|
+
|
5399
|
+
# Perform an aggregation of bitwise ANDs.
|
5400
|
+
#
|
5401
|
+
# @return [Object]
|
5402
|
+
def bitwise_and
|
5403
|
+
_s.bitwise_and
|
5404
|
+
end
|
5405
|
+
|
5406
|
+
# Perform an aggregation of bitwise ORs.
|
5407
|
+
#
|
5408
|
+
# @return [Object]
|
5409
|
+
def bitwise_or
|
5410
|
+
_s.bitwise_or
|
5411
|
+
end
|
5412
|
+
|
5413
|
+
# Perform an aggregation of bitwise XORs.
|
5414
|
+
#
|
5415
|
+
# @return [Object]
|
5416
|
+
def bitwise_xor
|
5417
|
+
_s.bitwise_xor
|
5418
|
+
end
|
5419
|
+
|
5420
|
+
# Get the first element of the Series.
|
5421
|
+
#
|
5422
|
+
# Returns `nil` if the Series is empty.
|
5423
|
+
#
|
5424
|
+
# @return [Object]
|
5425
|
+
def first
|
5426
|
+
_s.first
|
5427
|
+
end
|
5428
|
+
|
5429
|
+
# Get the last element of the Series.
|
5430
|
+
#
|
5431
|
+
# Returns `nil` if the Series is empty.
|
5432
|
+
#
|
5433
|
+
# @return [Object]
|
5434
|
+
def last
|
5435
|
+
_s.last
|
5436
|
+
end
|
5437
|
+
|
5438
|
+
# Approximate count of unique values.
|
5439
|
+
#
|
5440
|
+
# This is done using the HyperLogLog++ algorithm for cardinality estimation.
|
5441
|
+
#
|
5442
|
+
# @return [Object]
|
5443
|
+
def approx_n_unique
|
5444
|
+
_s.approx_n_unique
|
5445
|
+
end
|
5446
|
+
|
4441
5447
|
# Create an object namespace of all list related methods.
|
4442
5448
|
#
|
4443
5449
|
# @return [ListNameSpace]
|
@@ -4487,6 +5493,20 @@ module Polars
|
|
4487
5493
|
StructNameSpace.new(self)
|
4488
5494
|
end
|
4489
5495
|
|
5496
|
+
# Repeat the elements in this Series as specified in the given expression.
|
5497
|
+
#
|
5498
|
+
# The repeated elements are expanded into a List.
|
5499
|
+
#
|
5500
|
+
# @param by [Object]
|
5501
|
+
# Numeric column that determines how often the values will be repeated.
|
5502
|
+
# The column will be coerced to UInt32. Give this dtype to make the coercion
|
5503
|
+
# a no-op.
|
5504
|
+
#
|
5505
|
+
# @return [Object]
|
5506
|
+
def repeat_by(by)
|
5507
|
+
super
|
5508
|
+
end
|
5509
|
+
|
4490
5510
|
private
|
4491
5511
|
|
4492
5512
|
def initialize_copy(other)
|