polars-df 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
data/lib/polars/series.rb
CHANGED
@@ -3,9 +3,6 @@ module Polars
|
|
3
3
|
class Series
|
4
4
|
include ExprDispatch
|
5
5
|
|
6
|
-
# @private
|
7
|
-
attr_accessor :_s
|
8
|
-
|
9
6
|
# Create a new Series.
|
10
7
|
#
|
11
8
|
# @param name [String, Array, nil]
|
@@ -730,8 +727,43 @@ module Polars
|
|
730
727
|
Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
|
731
728
|
end
|
732
729
|
|
733
|
-
#
|
734
|
-
#
|
730
|
+
# Run an expression over a sliding window that increases `1` slot every iteration.
|
731
|
+
#
|
732
|
+
# @param expr [Expr]
|
733
|
+
# Expression to evaluate
|
734
|
+
# @param min_periods [Integer]
|
735
|
+
# Number of valid values there should be in the window before the expression
|
736
|
+
# is evaluated. valid values = `length - null_count`
|
737
|
+
# @param parallel [Boolean]
|
738
|
+
# Run in parallel. Don't do this in a groupby or another operation that
|
739
|
+
# already has much parallelization.
|
740
|
+
#
|
741
|
+
# @return [Series]
|
742
|
+
#
|
743
|
+
# @note
|
744
|
+
# This functionality is experimental and may change without it being considered a
|
745
|
+
# breaking change.
|
746
|
+
#
|
747
|
+
# @note
|
748
|
+
# This can be really slow as it can have `O(n^2)` complexity. Don't use this
|
749
|
+
# for operations that visit all elements.
|
750
|
+
#
|
751
|
+
# @example
|
752
|
+
# s = Polars::Series.new("values", [1, 2, 3, 4, 5])
|
753
|
+
# s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
|
754
|
+
# # =>
|
755
|
+
# # shape: (5,)
|
756
|
+
# # Series: 'values' [f64]
|
757
|
+
# # [
|
758
|
+
# # 0.0
|
759
|
+
# # -3.0
|
760
|
+
# # -8.0
|
761
|
+
# # -15.0
|
762
|
+
# # -24.0
|
763
|
+
# # ]
|
764
|
+
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
765
|
+
super
|
766
|
+
end
|
735
767
|
|
736
768
|
# Return a copy of the Series with a new alias/name.
|
737
769
|
#
|
@@ -1436,8 +1468,57 @@ module Polars
|
|
1436
1468
|
super
|
1437
1469
|
end
|
1438
1470
|
|
1439
|
-
#
|
1440
|
-
#
|
1471
|
+
# Check if elements of this Series are in the other Series.
|
1472
|
+
#
|
1473
|
+
# @return [Series]
|
1474
|
+
#
|
1475
|
+
# @example
|
1476
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1477
|
+
# s2 = Polars::Series.new("b", [2, 4])
|
1478
|
+
# s2.is_in(s)
|
1479
|
+
# # =>
|
1480
|
+
# # shape: (2,)
|
1481
|
+
# # Series: 'b' [bool]
|
1482
|
+
# # [
|
1483
|
+
# # true
|
1484
|
+
# # false
|
1485
|
+
# # ]
|
1486
|
+
#
|
1487
|
+
# @example
|
1488
|
+
# sets = Polars::Series.new("sets", [[1, 2, 3], [1, 2], [9, 10]])
|
1489
|
+
# # =>
|
1490
|
+
# # shape: (3,)
|
1491
|
+
# # Series: 'sets' [list]
|
1492
|
+
# # [
|
1493
|
+
# # [1, 2, 3]
|
1494
|
+
# # [1, 2]
|
1495
|
+
# # [9, 10]
|
1496
|
+
# # ]
|
1497
|
+
#
|
1498
|
+
# @example
|
1499
|
+
# optional_members = Polars::Series.new("optional_members", [1, 2, 3])
|
1500
|
+
# # =>
|
1501
|
+
# # shape: (3,)
|
1502
|
+
# # Series: 'optional_members' [i64]
|
1503
|
+
# # [
|
1504
|
+
# # 1
|
1505
|
+
# # 2
|
1506
|
+
# # 3
|
1507
|
+
# # ]
|
1508
|
+
#
|
1509
|
+
# @example
|
1510
|
+
# optional_members.is_in(sets)
|
1511
|
+
# # =>
|
1512
|
+
# # shape: (3,)
|
1513
|
+
# # Series: 'optional_members' [bool]
|
1514
|
+
# # [
|
1515
|
+
# # true
|
1516
|
+
# # true
|
1517
|
+
# # false
|
1518
|
+
# # ]
|
1519
|
+
def is_in(other)
|
1520
|
+
super
|
1521
|
+
end
|
1441
1522
|
|
1442
1523
|
# Get index values where Boolean Series evaluate `true`.
|
1443
1524
|
#
|
@@ -1909,8 +1990,28 @@ module Polars
|
|
1909
1990
|
super
|
1910
1991
|
end
|
1911
1992
|
|
1912
|
-
#
|
1913
|
-
#
|
1993
|
+
# Compute the dot/inner product between two Series.
|
1994
|
+
#
|
1995
|
+
# @param other [Object]
|
1996
|
+
# Series (or array) to compute dot product with.
|
1997
|
+
#
|
1998
|
+
# @return [Numeric]
|
1999
|
+
#
|
2000
|
+
# @example
|
2001
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2002
|
+
# s2 = Polars::Series.new("b", [4.0, 5.0, 6.0])
|
2003
|
+
# s.dot(s2)
|
2004
|
+
# # => 32.0
|
2005
|
+
def dot(other)
|
2006
|
+
if !other.is_a?(Series)
|
2007
|
+
other = Series.new(other)
|
2008
|
+
end
|
2009
|
+
if len != other.len
|
2010
|
+
n, m = len, other.len
|
2011
|
+
raise ArgumentError, "Series length mismatch: expected #{n}, found #{m}"
|
2012
|
+
end
|
2013
|
+
_s.dot(other._s)
|
2014
|
+
end
|
1914
2015
|
|
1915
2016
|
# Compute the most occurring value(s).
|
1916
2017
|
#
|
@@ -2277,38 +2378,509 @@ module Polars
|
|
2277
2378
|
Utils.wrap_s(_s.zip_with(mask._s, other._s))
|
2278
2379
|
end
|
2279
2380
|
|
2280
|
-
#
|
2281
|
-
#
|
2381
|
+
# Apply a rolling min (moving min) over the values in this array.
|
2382
|
+
#
|
2383
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2384
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2385
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2386
|
+
#
|
2387
|
+
# @param window_size [Integer]
|
2388
|
+
# The length of the window.
|
2389
|
+
# @param weights [Array]
|
2390
|
+
# An optional slice with the same length as the window that will be multiplied
|
2391
|
+
# elementwise with the values in the window.
|
2392
|
+
# @param min_periods [Integer]
|
2393
|
+
# The number of values in the window that should be non-null before computing
|
2394
|
+
# a result. If None, it will be set equal to window size.
|
2395
|
+
# @param center [Boolean]
|
2396
|
+
# Set the labels at the center of the window
|
2397
|
+
#
|
2398
|
+
# @return [Series]
|
2399
|
+
#
|
2400
|
+
# @example
|
2401
|
+
# s = Polars::Series.new("a", [100, 200, 300, 400, 500])
|
2402
|
+
# s.rolling_min(3)
|
2403
|
+
# # =>
|
2404
|
+
# # shape: (5,)
|
2405
|
+
# # Series: 'a' [i64]
|
2406
|
+
# # [
|
2407
|
+
# # null
|
2408
|
+
# # null
|
2409
|
+
# # 100
|
2410
|
+
# # 200
|
2411
|
+
# # 300
|
2412
|
+
# # ]
|
2413
|
+
def rolling_min(
|
2414
|
+
window_size,
|
2415
|
+
weights: nil,
|
2416
|
+
min_periods: nil,
|
2417
|
+
center: false
|
2418
|
+
)
|
2419
|
+
to_frame
|
2420
|
+
.select(
|
2421
|
+
Polars.col(name).rolling_min(
|
2422
|
+
window_size,
|
2423
|
+
weights: weights,
|
2424
|
+
min_periods: min_periods,
|
2425
|
+
center: center
|
2426
|
+
)
|
2427
|
+
)
|
2428
|
+
.to_series
|
2429
|
+
end
|
2282
2430
|
|
2283
|
-
#
|
2284
|
-
#
|
2431
|
+
# Apply a rolling max (moving max) over the values in this array.
|
2432
|
+
#
|
2433
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2434
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2435
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2436
|
+
#
|
2437
|
+
# @param window_size [Integer]
|
2438
|
+
# The length of the window.
|
2439
|
+
# @param weights [Array]
|
2440
|
+
# An optional slice with the same length as the window that will be multiplied
|
2441
|
+
# elementwise with the values in the window.
|
2442
|
+
# @param min_periods [Integer]
|
2443
|
+
# The number of values in the window that should be non-null before computing
|
2444
|
+
# a result. If None, it will be set equal to window size.
|
2445
|
+
# @param center [Boolean]
|
2446
|
+
# Set the labels at the center of the window
|
2447
|
+
#
|
2448
|
+
# @return [Series]
|
2449
|
+
#
|
2450
|
+
# @example
|
2451
|
+
# s = Polars::Series.new("a", [100, 200, 300, 400, 500])
|
2452
|
+
# s.rolling_max(2)
|
2453
|
+
# # =>
|
2454
|
+
# # shape: (5,)
|
2455
|
+
# # Series: 'a' [i64]
|
2456
|
+
# # [
|
2457
|
+
# # null
|
2458
|
+
# # 200
|
2459
|
+
# # 300
|
2460
|
+
# # 400
|
2461
|
+
# # 500
|
2462
|
+
# # ]
|
2463
|
+
def rolling_max(
|
2464
|
+
window_size,
|
2465
|
+
weights: nil,
|
2466
|
+
min_periods: nil,
|
2467
|
+
center: false
|
2468
|
+
)
|
2469
|
+
to_frame
|
2470
|
+
.select(
|
2471
|
+
Polars.col(name).rolling_max(
|
2472
|
+
window_size,
|
2473
|
+
weights: weights,
|
2474
|
+
min_periods: min_periods,
|
2475
|
+
center: center
|
2476
|
+
)
|
2477
|
+
)
|
2478
|
+
.to_series
|
2479
|
+
end
|
2285
2480
|
|
2286
|
-
#
|
2287
|
-
#
|
2481
|
+
# Apply a rolling mean (moving mean) over the values in this array.
|
2482
|
+
#
|
2483
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2484
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2485
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2486
|
+
#
|
2487
|
+
# @param window_size [Integer]
|
2488
|
+
# The length of the window.
|
2489
|
+
# @param weights [Array]
|
2490
|
+
# An optional slice with the same length as the window that will be multiplied
|
2491
|
+
# elementwise with the values in the window.
|
2492
|
+
# @param min_periods [Integer]
|
2493
|
+
# The number of values in the window that should be non-null before computing
|
2494
|
+
# a result. If None, it will be set equal to window size.
|
2495
|
+
# @param center [Boolean]
|
2496
|
+
# Set the labels at the center of the window
|
2497
|
+
#
|
2498
|
+
# @return [Series]
|
2499
|
+
#
|
2500
|
+
# @example
|
2501
|
+
# s = Polars::Series.new("a", [100, 200, 300, 400, 500])
|
2502
|
+
# s.rolling_mean(2)
|
2503
|
+
# # =>
|
2504
|
+
# # shape: (5,)
|
2505
|
+
# # Series: 'a' [f64]
|
2506
|
+
# # [
|
2507
|
+
# # null
|
2508
|
+
# # 150.0
|
2509
|
+
# # 250.0
|
2510
|
+
# # 350.0
|
2511
|
+
# # 450.0
|
2512
|
+
# # ]
|
2513
|
+
def rolling_mean(
|
2514
|
+
window_size,
|
2515
|
+
weights: nil,
|
2516
|
+
min_periods: nil,
|
2517
|
+
center: false
|
2518
|
+
)
|
2519
|
+
to_frame
|
2520
|
+
.select(
|
2521
|
+
Polars.col(name).rolling_mean(
|
2522
|
+
window_size,
|
2523
|
+
weights: weights,
|
2524
|
+
min_periods: min_periods,
|
2525
|
+
center: center
|
2526
|
+
)
|
2527
|
+
)
|
2528
|
+
.to_series
|
2529
|
+
end
|
2288
2530
|
|
2289
|
-
#
|
2290
|
-
#
|
2531
|
+
# Apply a rolling sum (moving sum) over the values in this array.
|
2532
|
+
#
|
2533
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2534
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2535
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2536
|
+
#
|
2537
|
+
# @param window_size [Integer]
|
2538
|
+
# The length of the window.
|
2539
|
+
# @param weights [Array]
|
2540
|
+
# An optional slice with the same length as the window that will be multiplied
|
2541
|
+
# elementwise with the values in the window.
|
2542
|
+
# @param min_periods [Integer]
|
2543
|
+
# The number of values in the window that should be non-null before computing
|
2544
|
+
# a result. If None, it will be set equal to window size.
|
2545
|
+
# @param center [Boolean]
|
2546
|
+
# Set the labels at the center of the window
|
2547
|
+
#
|
2548
|
+
# @return [Series]
|
2549
|
+
#
|
2550
|
+
# @example
|
2551
|
+
# s = Polars::Series.new("a", [1, 2, 3, 4, 5])
|
2552
|
+
# s.rolling_sum(2)
|
2553
|
+
# # =>
|
2554
|
+
# # shape: (5,)
|
2555
|
+
# # Series: 'a' [i64]
|
2556
|
+
# # [
|
2557
|
+
# # null
|
2558
|
+
# # 3
|
2559
|
+
# # 5
|
2560
|
+
# # 7
|
2561
|
+
# # 9
|
2562
|
+
# # ]
|
2563
|
+
def rolling_sum(
|
2564
|
+
window_size,
|
2565
|
+
weights: nil,
|
2566
|
+
min_periods: nil,
|
2567
|
+
center: false
|
2568
|
+
)
|
2569
|
+
to_frame
|
2570
|
+
.select(
|
2571
|
+
Polars.col(name).rolling_sum(
|
2572
|
+
window_size,
|
2573
|
+
weights: weights,
|
2574
|
+
min_periods: min_periods,
|
2575
|
+
center: center
|
2576
|
+
)
|
2577
|
+
)
|
2578
|
+
.to_series
|
2579
|
+
end
|
2291
2580
|
|
2292
|
-
#
|
2293
|
-
#
|
2581
|
+
# Compute a rolling std dev.
|
2582
|
+
#
|
2583
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2584
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2585
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2586
|
+
#
|
2587
|
+
# @param window_size [Integer]
|
2588
|
+
# The length of the window.
|
2589
|
+
# @param weights [Array]
|
2590
|
+
# An optional slice with the same length as the window that will be multiplied
|
2591
|
+
# elementwise with the values in the window.
|
2592
|
+
# @param min_periods [Integer]
|
2593
|
+
# The number of values in the window that should be non-null before computing
|
2594
|
+
# a result. If None, it will be set equal to window size.
|
2595
|
+
# @param center [Boolean]
|
2596
|
+
# Set the labels at the center of the window
|
2597
|
+
#
|
2598
|
+
# @return [Series]
|
2599
|
+
#
|
2600
|
+
# @example
|
2601
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2602
|
+
# s.rolling_std(3)
|
2603
|
+
# # =>
|
2604
|
+
# # shape: (6,)
|
2605
|
+
# # Series: 'a' [f64]
|
2606
|
+
# # [
|
2607
|
+
# # null
|
2608
|
+
# # null
|
2609
|
+
# # 1.0
|
2610
|
+
# # 1.0
|
2611
|
+
# # 1.527525
|
2612
|
+
# # 2.0
|
2613
|
+
# # ]
|
2614
|
+
def rolling_std(
|
2615
|
+
window_size,
|
2616
|
+
weights: nil,
|
2617
|
+
min_periods: nil,
|
2618
|
+
center: false
|
2619
|
+
)
|
2620
|
+
to_frame
|
2621
|
+
.select(
|
2622
|
+
Polars.col(name).rolling_std(
|
2623
|
+
window_size,
|
2624
|
+
weights: weights,
|
2625
|
+
min_periods: min_periods,
|
2626
|
+
center: center
|
2627
|
+
)
|
2628
|
+
)
|
2629
|
+
.to_series
|
2630
|
+
end
|
2294
2631
|
|
2295
|
-
#
|
2296
|
-
#
|
2632
|
+
# Compute a rolling variance.
|
2633
|
+
#
|
2634
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2635
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2636
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2637
|
+
#
|
2638
|
+
# @param window_size [Integer]
|
2639
|
+
# The length of the window.
|
2640
|
+
# @param weights [Array]
|
2641
|
+
# An optional slice with the same length as the window that will be multiplied
|
2642
|
+
# elementwise with the values in the window.
|
2643
|
+
# @param min_periods [Integer]
|
2644
|
+
# The number of values in the window that should be non-null before computing
|
2645
|
+
# a result. If None, it will be set equal to window size.
|
2646
|
+
# @param center [Boolean]
|
2647
|
+
# Set the labels at the center of the window
|
2648
|
+
#
|
2649
|
+
# @return [Series]
|
2650
|
+
#
|
2651
|
+
# @example
|
2652
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2653
|
+
# s.rolling_var(3)
|
2654
|
+
# # =>
|
2655
|
+
# # shape: (6,)
|
2656
|
+
# # Series: 'a' [f64]
|
2657
|
+
# # [
|
2658
|
+
# # null
|
2659
|
+
# # null
|
2660
|
+
# # 1.0
|
2661
|
+
# # 1.0
|
2662
|
+
# # 2.333333
|
2663
|
+
# # 4.0
|
2664
|
+
# # ]
|
2665
|
+
def rolling_var(
|
2666
|
+
window_size,
|
2667
|
+
weights: nil,
|
2668
|
+
min_periods: nil,
|
2669
|
+
center: false
|
2670
|
+
)
|
2671
|
+
to_frame
|
2672
|
+
.select(
|
2673
|
+
Polars.col(name).rolling_var(
|
2674
|
+
window_size,
|
2675
|
+
weights: weights,
|
2676
|
+
min_periods: min_periods,
|
2677
|
+
center: center
|
2678
|
+
)
|
2679
|
+
)
|
2680
|
+
.to_series
|
2681
|
+
end
|
2297
2682
|
|
2298
2683
|
# def rolling_apply
|
2299
2684
|
# end
|
2300
2685
|
|
2301
|
-
#
|
2302
|
-
#
|
2686
|
+
# Compute a rolling median.
|
2687
|
+
#
|
2688
|
+
# @param window_size [Integer]
|
2689
|
+
# The length of the window.
|
2690
|
+
# @param weights [Array]
|
2691
|
+
# An optional slice with the same length as the window that will be multiplied
|
2692
|
+
# elementwise with the values in the window.
|
2693
|
+
# @param min_periods [Integer]
|
2694
|
+
# The number of values in the window that should be non-null before computing
|
2695
|
+
# a result. If None, it will be set equal to window size.
|
2696
|
+
# @param center [Boolean]
|
2697
|
+
# Set the labels at the center of the window
|
2698
|
+
#
|
2699
|
+
# @return [Series]
|
2700
|
+
#
|
2701
|
+
# @example
|
2702
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2703
|
+
# s.rolling_median(3)
|
2704
|
+
# # =>
|
2705
|
+
# # shape: (6,)
|
2706
|
+
# # Series: 'a' [f64]
|
2707
|
+
# # [
|
2708
|
+
# # null
|
2709
|
+
# # null
|
2710
|
+
# # 2.0
|
2711
|
+
# # 3.0
|
2712
|
+
# # 4.0
|
2713
|
+
# # 6.0
|
2714
|
+
# # ]
|
2715
|
+
def rolling_median(
|
2716
|
+
window_size,
|
2717
|
+
weights: nil,
|
2718
|
+
min_periods: nil,
|
2719
|
+
center: false
|
2720
|
+
)
|
2721
|
+
if min_periods.nil?
|
2722
|
+
min_periods = window_size
|
2723
|
+
end
|
2303
2724
|
|
2304
|
-
|
2305
|
-
|
2725
|
+
to_frame
|
2726
|
+
.select(
|
2727
|
+
Polars.col(name).rolling_median(
|
2728
|
+
window_size,
|
2729
|
+
weights: weights,
|
2730
|
+
min_periods: min_periods,
|
2731
|
+
center: center
|
2732
|
+
)
|
2733
|
+
)
|
2734
|
+
.to_series
|
2735
|
+
end
|
2306
2736
|
|
2307
|
-
#
|
2308
|
-
#
|
2737
|
+
# Compute a rolling quantile.
|
2738
|
+
#
|
2739
|
+
# @param quantile [Float]
|
2740
|
+
# Quantile between 0.0 and 1.0.
|
2741
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
2742
|
+
# Interpolation method.
|
2743
|
+
# @param window_size [Integer]
|
2744
|
+
# The length of the window.
|
2745
|
+
# @param weights [Array]
|
2746
|
+
# An optional slice with the same length as the window that will be multiplied
|
2747
|
+
# elementwise with the values in the window.
|
2748
|
+
# @param min_periods [Integer]
|
2749
|
+
# The number of values in the window that should be non-null before computing
|
2750
|
+
# a result. If None, it will be set equal to window size.
|
2751
|
+
# @param center [Boolean]
|
2752
|
+
# Set the labels at the center of the window
|
2753
|
+
#
|
2754
|
+
# @return [Series]
|
2755
|
+
#
|
2756
|
+
# @example
|
2757
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2758
|
+
# s.rolling_quantile(0.33, window_size: 3)
|
2759
|
+
# # =>
|
2760
|
+
# # shape: (6,)
|
2761
|
+
# # Series: 'a' [f64]
|
2762
|
+
# # [
|
2763
|
+
# # null
|
2764
|
+
# # null
|
2765
|
+
# # 1.0
|
2766
|
+
# # 2.0
|
2767
|
+
# # 3.0
|
2768
|
+
# # 4.0
|
2769
|
+
# # ]
|
2770
|
+
#
|
2771
|
+
# @example
|
2772
|
+
# s.rolling_quantile(0.33, interpolation: "linear", window_size: 3)
|
2773
|
+
# # =>
|
2774
|
+
# # shape: (6,)
|
2775
|
+
# # Series: 'a' [f64]
|
2776
|
+
# # [
|
2777
|
+
# # null
|
2778
|
+
# # null
|
2779
|
+
# # 1.66
|
2780
|
+
# # 2.66
|
2781
|
+
# # 3.66
|
2782
|
+
# # 5.32
|
2783
|
+
# # ]
|
2784
|
+
def rolling_quantile(
|
2785
|
+
quantile,
|
2786
|
+
interpolation: "nearest",
|
2787
|
+
window_size: 2,
|
2788
|
+
weights: nil,
|
2789
|
+
min_periods: nil,
|
2790
|
+
center: false
|
2791
|
+
)
|
2792
|
+
if min_periods.nil?
|
2793
|
+
min_periods = window_size
|
2794
|
+
end
|
2309
2795
|
|
2310
|
-
|
2311
|
-
|
2796
|
+
to_frame
|
2797
|
+
.select(
|
2798
|
+
Polars.col(name).rolling_quantile(
|
2799
|
+
quantile,
|
2800
|
+
interpolation: interpolation,
|
2801
|
+
window_size: window_size,
|
2802
|
+
weights: weights,
|
2803
|
+
min_periods: min_periods,
|
2804
|
+
center: center
|
2805
|
+
)
|
2806
|
+
)
|
2807
|
+
.to_series
|
2808
|
+
end
|
2809
|
+
|
2810
|
+
# Compute a rolling skew.
|
2811
|
+
#
|
2812
|
+
# @param window_size [Integer]
|
2813
|
+
# Integer size of the rolling window.
|
2814
|
+
# @param bias [Boolean]
|
2815
|
+
# If false, the calculations are corrected for statistical bias.
|
2816
|
+
#
|
2817
|
+
# @return [Series]
|
2818
|
+
#
|
2819
|
+
# @example
|
2820
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2821
|
+
# s.rolling_skew(3)
|
2822
|
+
# # =>
|
2823
|
+
# # shape: (6,)
|
2824
|
+
# # Series: 'a' [f64]
|
2825
|
+
# # [
|
2826
|
+
# # null
|
2827
|
+
# # null
|
2828
|
+
# # 0.0
|
2829
|
+
# # 0.0
|
2830
|
+
# # 0.381802
|
2831
|
+
# # 0.0
|
2832
|
+
# # ]
|
2833
|
+
def rolling_skew(window_size, bias: true)
|
2834
|
+
super
|
2835
|
+
end
|
2836
|
+
|
2837
|
+
# Sample from this Series.
|
2838
|
+
#
|
2839
|
+
# @param n [Integer]
|
2840
|
+
# Number of items to return. Cannot be used with `frac`. Defaults to 1 if
|
2841
|
+
# `frac` is None.
|
2842
|
+
# @param frac [Float]
|
2843
|
+
# Fraction of items to return. Cannot be used with `n`.
|
2844
|
+
# @param with_replacement [Boolean]
|
2845
|
+
# Allow values to be sampled more than once.
|
2846
|
+
# @param shuffle [Boolean]
|
2847
|
+
# Shuffle the order of sampled data points.
|
2848
|
+
# @param seed [Integer]
|
2849
|
+
# Seed for the random number generator. If set to None (default), a random
|
2850
|
+
# seed is used.
|
2851
|
+
#
|
2852
|
+
# @return [Series]
|
2853
|
+
#
|
2854
|
+
# @example
|
2855
|
+
# s = Polars::Series.new("a", [1, 2, 3, 4, 5])
|
2856
|
+
# s.sample(n: 2, seed: 0)
|
2857
|
+
# # =>
|
2858
|
+
# # shape: (2,)
|
2859
|
+
# # Series: 'a' [i64]
|
2860
|
+
# # [
|
2861
|
+
# # 1
|
2862
|
+
# # 5
|
2863
|
+
# # ]
|
2864
|
+
def sample(
|
2865
|
+
n: nil,
|
2866
|
+
frac: nil,
|
2867
|
+
with_replacement: false,
|
2868
|
+
shuffle: false,
|
2869
|
+
seed: nil
|
2870
|
+
)
|
2871
|
+
if !n.nil? && !frac.nil?
|
2872
|
+
raise ArgumentError, "cannot specify both `n` and `frac`"
|
2873
|
+
end
|
2874
|
+
|
2875
|
+
if n.nil? && !frac.nil?
|
2876
|
+
return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
|
2877
|
+
end
|
2878
|
+
|
2879
|
+
if n.nil?
|
2880
|
+
n = 1
|
2881
|
+
end
|
2882
|
+
Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
|
2883
|
+
end
|
2312
2884
|
|
2313
2885
|
# Get a boolean mask of the local maximum peaks.
|
2314
2886
|
#
|
@@ -2425,8 +2997,61 @@ module Polars
|
|
2425
2997
|
super
|
2426
2998
|
end
|
2427
2999
|
|
2428
|
-
#
|
2429
|
-
#
|
3000
|
+
# Assign ranks to data, dealing with ties appropriately.
|
3001
|
+
#
|
3002
|
+
# @param method ["average", "min", "max", "dense", "ordinal", "random"]
|
3003
|
+
# The method used to assign ranks to tied elements.
|
3004
|
+
# The following methods are available (default is 'average'):
|
3005
|
+
#
|
3006
|
+
# - 'average' : The average of the ranks that would have been assigned to
|
3007
|
+
# all the tied values is assigned to each value.
|
3008
|
+
# - 'min' : The minimum of the ranks that would have been assigned to all
|
3009
|
+
# the tied values is assigned to each value. (This is also referred to
|
3010
|
+
# as "competition" ranking.)
|
3011
|
+
# - 'max' : The maximum of the ranks that would have been assigned to all
|
3012
|
+
# the tied values is assigned to each value.
|
3013
|
+
# - 'dense' : Like 'min', but the rank of the next highest element is
|
3014
|
+
# assigned the rank immediately after those assigned to the tied
|
3015
|
+
# elements.
|
3016
|
+
# - 'ordinal' : All values are given a distinct rank, corresponding to
|
3017
|
+
# the order that the values occur in the Series.
|
3018
|
+
# - 'random' : Like 'ordinal', but the rank for ties is not dependent
|
3019
|
+
# on the order that the values occur in the Series.
|
3020
|
+
# @param reverse [Boolean]
|
3021
|
+
# Reverse the operation.
|
3022
|
+
#
|
3023
|
+
# @return [Series]
|
3024
|
+
#
|
3025
|
+
# @example The 'average' method:
|
3026
|
+
# s = Polars::Series.new("a", [3, 6, 1, 1, 6])
|
3027
|
+
# s.rank
|
3028
|
+
# # =>
|
3029
|
+
# # shape: (5,)
|
3030
|
+
# # Series: 'a' [f32]
|
3031
|
+
# # [
|
3032
|
+
# # 3.0
|
3033
|
+
# # 4.5
|
3034
|
+
# # 1.5
|
3035
|
+
# # 1.5
|
3036
|
+
# # 4.5
|
3037
|
+
# # ]
|
3038
|
+
#
|
3039
|
+
# @example The 'ordinal' method:
|
3040
|
+
# s = Polars::Series.new("a", [3, 6, 1, 1, 6])
|
3041
|
+
# s.rank(method: "ordinal")
|
3042
|
+
# # =>
|
3043
|
+
# # shape: (5,)
|
3044
|
+
# # Series: 'a' [u32]
|
3045
|
+
# # [
|
3046
|
+
# # 3
|
3047
|
+
# # 4
|
3048
|
+
# # 1
|
3049
|
+
# # 2
|
3050
|
+
# # 5
|
3051
|
+
# # ]
|
3052
|
+
def rank(method: "average", reverse: false)
|
3053
|
+
super
|
3054
|
+
end
|
2430
3055
|
|
2431
3056
|
# Calculate the n-th discrete difference.
|
2432
3057
|
#
|
@@ -2440,8 +3065,56 @@ module Polars
|
|
2440
3065
|
super
|
2441
3066
|
end
|
2442
3067
|
|
2443
|
-
#
|
2444
|
-
#
|
3068
|
+
# Computes percentage change between values.
|
3069
|
+
#
|
3070
|
+
# Percentage change (as fraction) between current element and most-recent
|
3071
|
+
# non-null element at least `n` period(s) before the current element.
|
3072
|
+
#
|
3073
|
+
# Computes the change from the previous row by default.
|
3074
|
+
#
|
3075
|
+
# @param n [Integer]
|
3076
|
+
# periods to shift for forming percent change.
|
3077
|
+
#
|
3078
|
+
# @return [Series]
|
3079
|
+
#
|
3080
|
+
# @example
|
3081
|
+
# Polars::Series.new(0..9).pct_change
|
3082
|
+
# # =>
|
3083
|
+
# # shape: (10,)
|
3084
|
+
# # Series: '' [f64]
|
3085
|
+
# # [
|
3086
|
+
# # null
|
3087
|
+
# # inf
|
3088
|
+
# # 1.0
|
3089
|
+
# # 0.5
|
3090
|
+
# # 0.333333
|
3091
|
+
# # 0.25
|
3092
|
+
# # 0.2
|
3093
|
+
# # 0.166667
|
3094
|
+
# # 0.142857
|
3095
|
+
# # 0.125
|
3096
|
+
# # ]
|
3097
|
+
#
|
3098
|
+
# @example
|
3099
|
+
# Polars::Series.new([1, 2, 4, 8, 16, 32, 64, 128, 256, 512]).pct_change(n: 2)
|
3100
|
+
# # =>
|
3101
|
+
# # shape: (10,)
|
3102
|
+
# # Series: '' [f64]
|
3103
|
+
# # [
|
3104
|
+
# # null
|
3105
|
+
# # null
|
3106
|
+
# # 3.0
|
3107
|
+
# # 3.0
|
3108
|
+
# # 3.0
|
3109
|
+
# # 3.0
|
3110
|
+
# # 3.0
|
3111
|
+
# # 3.0
|
3112
|
+
# # 3.0
|
3113
|
+
# # 3.0
|
3114
|
+
# # ]
|
3115
|
+
def pct_change(n: 1)
|
3116
|
+
super
|
3117
|
+
end
|
2445
3118
|
|
2446
3119
|
# Compute the sample skewness of a data set.
|
2447
3120
|
#
|
@@ -2571,14 +3244,49 @@ module Polars
|
|
2571
3244
|
super
|
2572
3245
|
end
|
2573
3246
|
|
2574
|
-
#
|
2575
|
-
#
|
3247
|
+
# Exponentially-weighted moving average.
|
3248
|
+
#
|
3249
|
+
# @return [Series]
|
3250
|
+
def ewm_mean(
|
3251
|
+
com: nil,
|
3252
|
+
span: nil,
|
3253
|
+
half_life: nil,
|
3254
|
+
alpha: nil,
|
3255
|
+
adjust: true,
|
3256
|
+
min_periods: 1
|
3257
|
+
)
|
3258
|
+
super
|
3259
|
+
end
|
2576
3260
|
|
2577
|
-
#
|
2578
|
-
#
|
3261
|
+
# Exponentially-weighted moving standard deviation.
|
3262
|
+
#
|
3263
|
+
# @return [Series]
|
3264
|
+
def ewm_std(
|
3265
|
+
com: nil,
|
3266
|
+
span: nil,
|
3267
|
+
half_life: nil,
|
3268
|
+
alpha: nil,
|
3269
|
+
adjust: true,
|
3270
|
+
bias: false,
|
3271
|
+
min_periods: 1
|
3272
|
+
)
|
3273
|
+
super
|
3274
|
+
end
|
2579
3275
|
|
2580
|
-
#
|
2581
|
-
#
|
3276
|
+
# Exponentially-weighted moving variance.
|
3277
|
+
#
|
3278
|
+
# @return [Series]
|
3279
|
+
def ewm_var(
|
3280
|
+
com: nil,
|
3281
|
+
span: nil,
|
3282
|
+
half_life: nil,
|
3283
|
+
alpha: nil,
|
3284
|
+
adjust: true,
|
3285
|
+
bias: false,
|
3286
|
+
min_periods: 1
|
3287
|
+
)
|
3288
|
+
super
|
3289
|
+
end
|
2582
3290
|
|
2583
3291
|
# Extend the Series with given number of values.
|
2584
3292
|
#
|
@@ -2645,20 +3353,40 @@ module Polars
|
|
2645
3353
|
super
|
2646
3354
|
end
|
2647
3355
|
|
2648
|
-
#
|
2649
|
-
#
|
3356
|
+
# Create an object namespace of all list related methods.
|
3357
|
+
#
|
3358
|
+
# @return [ListNameSpace]
|
3359
|
+
def arr
|
3360
|
+
ListNameSpace.new(self)
|
3361
|
+
end
|
2650
3362
|
|
2651
|
-
#
|
2652
|
-
#
|
3363
|
+
# Create an object namespace of all categorical related methods.
|
3364
|
+
#
|
3365
|
+
# @return [CatNameSpace]
|
3366
|
+
def cat
|
3367
|
+
CatNameSpace.new(self)
|
3368
|
+
end
|
2653
3369
|
|
2654
|
-
#
|
2655
|
-
#
|
3370
|
+
# Create an object namespace of all datetime related methods.
|
3371
|
+
#
|
3372
|
+
# @return [DateTimeNameSpace]
|
3373
|
+
def dt
|
3374
|
+
DateTimeNameSpace.new(self)
|
3375
|
+
end
|
2656
3376
|
|
2657
|
-
#
|
2658
|
-
#
|
3377
|
+
# Create an object namespace of all string related methods.
|
3378
|
+
#
|
3379
|
+
# @return [StringNameSpace]
|
3380
|
+
def str
|
3381
|
+
StringNameSpace.new(self)
|
3382
|
+
end
|
2659
3383
|
|
2660
|
-
#
|
2661
|
-
#
|
3384
|
+
# Create an object namespace of all struct related methods.
|
3385
|
+
#
|
3386
|
+
# @return [StructNameSpace]
|
3387
|
+
def struct
|
3388
|
+
StructNameSpace.new(self)
|
3389
|
+
end
|
2662
3390
|
|
2663
3391
|
private
|
2664
3392
|
|
@@ -2751,6 +3479,10 @@ module Polars
|
|
2751
3479
|
|
2752
3480
|
if ruby_dtype == Date
|
2753
3481
|
RbSeries.new_opt_date(name, values, strict)
|
3482
|
+
elsif ruby_dtype == Time
|
3483
|
+
RbSeries.new_opt_datetime(name, values, strict)
|
3484
|
+
elsif ruby_dtype == DateTime
|
3485
|
+
RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
|
2754
3486
|
else
|
2755
3487
|
raise Todo
|
2756
3488
|
end
|
@@ -2764,7 +3496,26 @@ module Polars
|
|
2764
3496
|
raise Todo
|
2765
3497
|
end
|
2766
3498
|
|
2767
|
-
|
3499
|
+
if value.is_a?(Array)
|
3500
|
+
count = 0
|
3501
|
+
equal_to_inner = true
|
3502
|
+
values.each do |lst|
|
3503
|
+
lst.each do |vl|
|
3504
|
+
equal_to_inner = vl.class == nested_dtype
|
3505
|
+
if !equal_to_inner || count > 50
|
3506
|
+
break
|
3507
|
+
end
|
3508
|
+
count += 1
|
3509
|
+
end
|
3510
|
+
end
|
3511
|
+
if equal_to_inner
|
3512
|
+
dtype = Utils.rb_type_to_dtype(nested_dtype)
|
3513
|
+
# TODO rescue and fallback to new_object
|
3514
|
+
return RbSeries.new_list(name, values, dtype)
|
3515
|
+
end
|
3516
|
+
end
|
3517
|
+
|
3518
|
+
RbSeries.new_object(name, values, strict)
|
2768
3519
|
else
|
2769
3520
|
constructor = rb_type_to_constructor(value.class)
|
2770
3521
|
constructor.call(name, values, strict)
|
@@ -2804,8 +3555,7 @@ module Polars
|
|
2804
3555
|
def rb_type_to_constructor(dtype)
|
2805
3556
|
RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
|
2806
3557
|
rescue KeyError
|
2807
|
-
|
2808
|
-
raise ArgumentError, "Cannot determine type"
|
3558
|
+
RbSeries.method(:new_object)
|
2809
3559
|
end
|
2810
3560
|
|
2811
3561
|
def _get_first_non_none(values)
|