polars-df 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
data/lib/polars/series.rb
CHANGED
@@ -3,9 +3,6 @@ module Polars
|
|
3
3
|
class Series
|
4
4
|
include ExprDispatch
|
5
5
|
|
6
|
-
# @private
|
7
|
-
attr_accessor :_s
|
8
|
-
|
9
6
|
# Create a new Series.
|
10
7
|
#
|
11
8
|
# @param name [String, Array, nil]
|
@@ -730,8 +727,43 @@ module Polars
|
|
730
727
|
Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
|
731
728
|
end
|
732
729
|
|
733
|
-
#
|
734
|
-
#
|
730
|
+
# Run an expression over a sliding window that increases `1` slot every iteration.
|
731
|
+
#
|
732
|
+
# @param expr [Expr]
|
733
|
+
# Expression to evaluate
|
734
|
+
# @param min_periods [Integer]
|
735
|
+
# Number of valid values there should be in the window before the expression
|
736
|
+
# is evaluated. valid values = `length - null_count`
|
737
|
+
# @param parallel [Boolean]
|
738
|
+
# Run in parallel. Don't do this in a groupby or another operation that
|
739
|
+
# already has much parallelization.
|
740
|
+
#
|
741
|
+
# @return [Series]
|
742
|
+
#
|
743
|
+
# @note
|
744
|
+
# This functionality is experimental and may change without it being considered a
|
745
|
+
# breaking change.
|
746
|
+
#
|
747
|
+
# @note
|
748
|
+
# This can be really slow as it can have `O(n^2)` complexity. Don't use this
|
749
|
+
# for operations that visit all elements.
|
750
|
+
#
|
751
|
+
# @example
|
752
|
+
# s = Polars::Series.new("values", [1, 2, 3, 4, 5])
|
753
|
+
# s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
|
754
|
+
# # =>
|
755
|
+
# # shape: (5,)
|
756
|
+
# # Series: 'values' [f64]
|
757
|
+
# # [
|
758
|
+
# # 0.0
|
759
|
+
# # -3.0
|
760
|
+
# # -8.0
|
761
|
+
# # -15.0
|
762
|
+
# # -24.0
|
763
|
+
# # ]
|
764
|
+
def cumulative_eval(expr, min_periods: 1, parallel: false)
|
765
|
+
super
|
766
|
+
end
|
735
767
|
|
736
768
|
# Return a copy of the Series with a new alias/name.
|
737
769
|
#
|
@@ -1436,8 +1468,57 @@ module Polars
|
|
1436
1468
|
super
|
1437
1469
|
end
|
1438
1470
|
|
1439
|
-
#
|
1440
|
-
#
|
1471
|
+
# Check if elements of this Series are in the other Series.
|
1472
|
+
#
|
1473
|
+
# @return [Series]
|
1474
|
+
#
|
1475
|
+
# @example
|
1476
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1477
|
+
# s2 = Polars::Series.new("b", [2, 4])
|
1478
|
+
# s2.is_in(s)
|
1479
|
+
# # =>
|
1480
|
+
# # shape: (2,)
|
1481
|
+
# # Series: 'b' [bool]
|
1482
|
+
# # [
|
1483
|
+
# # true
|
1484
|
+
# # false
|
1485
|
+
# # ]
|
1486
|
+
#
|
1487
|
+
# @example
|
1488
|
+
# sets = Polars::Series.new("sets", [[1, 2, 3], [1, 2], [9, 10]])
|
1489
|
+
# # =>
|
1490
|
+
# # shape: (3,)
|
1491
|
+
# # Series: 'sets' [list]
|
1492
|
+
# # [
|
1493
|
+
# # [1, 2, 3]
|
1494
|
+
# # [1, 2]
|
1495
|
+
# # [9, 10]
|
1496
|
+
# # ]
|
1497
|
+
#
|
1498
|
+
# @example
|
1499
|
+
# optional_members = Polars::Series.new("optional_members", [1, 2, 3])
|
1500
|
+
# # =>
|
1501
|
+
# # shape: (3,)
|
1502
|
+
# # Series: 'optional_members' [i64]
|
1503
|
+
# # [
|
1504
|
+
# # 1
|
1505
|
+
# # 2
|
1506
|
+
# # 3
|
1507
|
+
# # ]
|
1508
|
+
#
|
1509
|
+
# @example
|
1510
|
+
# optional_members.is_in(sets)
|
1511
|
+
# # =>
|
1512
|
+
# # shape: (3,)
|
1513
|
+
# # Series: 'optional_members' [bool]
|
1514
|
+
# # [
|
1515
|
+
# # true
|
1516
|
+
# # true
|
1517
|
+
# # false
|
1518
|
+
# # ]
|
1519
|
+
def is_in(other)
|
1520
|
+
super
|
1521
|
+
end
|
1441
1522
|
|
1442
1523
|
# Get index values where Boolean Series evaluate `true`.
|
1443
1524
|
#
|
@@ -1909,8 +1990,28 @@ module Polars
|
|
1909
1990
|
super
|
1910
1991
|
end
|
1911
1992
|
|
1912
|
-
#
|
1913
|
-
#
|
1993
|
+
# Compute the dot/inner product between two Series.
|
1994
|
+
#
|
1995
|
+
# @param other [Object]
|
1996
|
+
# Series (or array) to compute dot product with.
|
1997
|
+
#
|
1998
|
+
# @return [Numeric]
|
1999
|
+
#
|
2000
|
+
# @example
|
2001
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2002
|
+
# s2 = Polars::Series.new("b", [4.0, 5.0, 6.0])
|
2003
|
+
# s.dot(s2)
|
2004
|
+
# # => 32.0
|
2005
|
+
def dot(other)
|
2006
|
+
if !other.is_a?(Series)
|
2007
|
+
other = Series.new(other)
|
2008
|
+
end
|
2009
|
+
if len != other.len
|
2010
|
+
n, m = len, other.len
|
2011
|
+
raise ArgumentError, "Series length mismatch: expected #{n}, found #{m}"
|
2012
|
+
end
|
2013
|
+
_s.dot(other._s)
|
2014
|
+
end
|
1914
2015
|
|
1915
2016
|
# Compute the most occurring value(s).
|
1916
2017
|
#
|
@@ -2277,38 +2378,509 @@ module Polars
|
|
2277
2378
|
Utils.wrap_s(_s.zip_with(mask._s, other._s))
|
2278
2379
|
end
|
2279
2380
|
|
2280
|
-
#
|
2281
|
-
#
|
2381
|
+
# Apply a rolling min (moving min) over the values in this array.
|
2382
|
+
#
|
2383
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2384
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2385
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2386
|
+
#
|
2387
|
+
# @param window_size [Integer]
|
2388
|
+
# The length of the window.
|
2389
|
+
# @param weights [Array]
|
2390
|
+
# An optional slice with the same length as the window that will be multiplied
|
2391
|
+
# elementwise with the values in the window.
|
2392
|
+
# @param min_periods [Integer]
|
2393
|
+
# The number of values in the window that should be non-null before computing
|
2394
|
+
# a result. If None, it will be set equal to window size.
|
2395
|
+
# @param center [Boolean]
|
2396
|
+
# Set the labels at the center of the window
|
2397
|
+
#
|
2398
|
+
# @return [Series]
|
2399
|
+
#
|
2400
|
+
# @example
|
2401
|
+
# s = Polars::Series.new("a", [100, 200, 300, 400, 500])
|
2402
|
+
# s.rolling_min(3)
|
2403
|
+
# # =>
|
2404
|
+
# # shape: (5,)
|
2405
|
+
# # Series: 'a' [i64]
|
2406
|
+
# # [
|
2407
|
+
# # null
|
2408
|
+
# # null
|
2409
|
+
# # 100
|
2410
|
+
# # 200
|
2411
|
+
# # 300
|
2412
|
+
# # ]
|
2413
|
+
def rolling_min(
|
2414
|
+
window_size,
|
2415
|
+
weights: nil,
|
2416
|
+
min_periods: nil,
|
2417
|
+
center: false
|
2418
|
+
)
|
2419
|
+
to_frame
|
2420
|
+
.select(
|
2421
|
+
Polars.col(name).rolling_min(
|
2422
|
+
window_size,
|
2423
|
+
weights: weights,
|
2424
|
+
min_periods: min_periods,
|
2425
|
+
center: center
|
2426
|
+
)
|
2427
|
+
)
|
2428
|
+
.to_series
|
2429
|
+
end
|
2282
2430
|
|
2283
|
-
#
|
2284
|
-
#
|
2431
|
+
# Apply a rolling max (moving max) over the values in this array.
|
2432
|
+
#
|
2433
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2434
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2435
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2436
|
+
#
|
2437
|
+
# @param window_size [Integer]
|
2438
|
+
# The length of the window.
|
2439
|
+
# @param weights [Array]
|
2440
|
+
# An optional slice with the same length as the window that will be multiplied
|
2441
|
+
# elementwise with the values in the window.
|
2442
|
+
# @param min_periods [Integer]
|
2443
|
+
# The number of values in the window that should be non-null before computing
|
2444
|
+
# a result. If None, it will be set equal to window size.
|
2445
|
+
# @param center [Boolean]
|
2446
|
+
# Set the labels at the center of the window
|
2447
|
+
#
|
2448
|
+
# @return [Series]
|
2449
|
+
#
|
2450
|
+
# @example
|
2451
|
+
# s = Polars::Series.new("a", [100, 200, 300, 400, 500])
|
2452
|
+
# s.rolling_max(2)
|
2453
|
+
# # =>
|
2454
|
+
# # shape: (5,)
|
2455
|
+
# # Series: 'a' [i64]
|
2456
|
+
# # [
|
2457
|
+
# # null
|
2458
|
+
# # 200
|
2459
|
+
# # 300
|
2460
|
+
# # 400
|
2461
|
+
# # 500
|
2462
|
+
# # ]
|
2463
|
+
def rolling_max(
|
2464
|
+
window_size,
|
2465
|
+
weights: nil,
|
2466
|
+
min_periods: nil,
|
2467
|
+
center: false
|
2468
|
+
)
|
2469
|
+
to_frame
|
2470
|
+
.select(
|
2471
|
+
Polars.col(name).rolling_max(
|
2472
|
+
window_size,
|
2473
|
+
weights: weights,
|
2474
|
+
min_periods: min_periods,
|
2475
|
+
center: center
|
2476
|
+
)
|
2477
|
+
)
|
2478
|
+
.to_series
|
2479
|
+
end
|
2285
2480
|
|
2286
|
-
#
|
2287
|
-
#
|
2481
|
+
# Apply a rolling mean (moving mean) over the values in this array.
|
2482
|
+
#
|
2483
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2484
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2485
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2486
|
+
#
|
2487
|
+
# @param window_size [Integer]
|
2488
|
+
# The length of the window.
|
2489
|
+
# @param weights [Array]
|
2490
|
+
# An optional slice with the same length as the window that will be multiplied
|
2491
|
+
# elementwise with the values in the window.
|
2492
|
+
# @param min_periods [Integer]
|
2493
|
+
# The number of values in the window that should be non-null before computing
|
2494
|
+
# a result. If None, it will be set equal to window size.
|
2495
|
+
# @param center [Boolean]
|
2496
|
+
# Set the labels at the center of the window
|
2497
|
+
#
|
2498
|
+
# @return [Series]
|
2499
|
+
#
|
2500
|
+
# @example
|
2501
|
+
# s = Polars::Series.new("a", [100, 200, 300, 400, 500])
|
2502
|
+
# s.rolling_mean(2)
|
2503
|
+
# # =>
|
2504
|
+
# # shape: (5,)
|
2505
|
+
# # Series: 'a' [f64]
|
2506
|
+
# # [
|
2507
|
+
# # null
|
2508
|
+
# # 150.0
|
2509
|
+
# # 250.0
|
2510
|
+
# # 350.0
|
2511
|
+
# # 450.0
|
2512
|
+
# # ]
|
2513
|
+
def rolling_mean(
|
2514
|
+
window_size,
|
2515
|
+
weights: nil,
|
2516
|
+
min_periods: nil,
|
2517
|
+
center: false
|
2518
|
+
)
|
2519
|
+
to_frame
|
2520
|
+
.select(
|
2521
|
+
Polars.col(name).rolling_mean(
|
2522
|
+
window_size,
|
2523
|
+
weights: weights,
|
2524
|
+
min_periods: min_periods,
|
2525
|
+
center: center
|
2526
|
+
)
|
2527
|
+
)
|
2528
|
+
.to_series
|
2529
|
+
end
|
2288
2530
|
|
2289
|
-
#
|
2290
|
-
#
|
2531
|
+
# Apply a rolling sum (moving sum) over the values in this array.
|
2532
|
+
#
|
2533
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2534
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2535
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2536
|
+
#
|
2537
|
+
# @param window_size [Integer]
|
2538
|
+
# The length of the window.
|
2539
|
+
# @param weights [Array]
|
2540
|
+
# An optional slice with the same length as the window that will be multiplied
|
2541
|
+
# elementwise with the values in the window.
|
2542
|
+
# @param min_periods [Integer]
|
2543
|
+
# The number of values in the window that should be non-null before computing
|
2544
|
+
# a result. If None, it will be set equal to window size.
|
2545
|
+
# @param center [Boolean]
|
2546
|
+
# Set the labels at the center of the window
|
2547
|
+
#
|
2548
|
+
# @return [Series]
|
2549
|
+
#
|
2550
|
+
# @example
|
2551
|
+
# s = Polars::Series.new("a", [1, 2, 3, 4, 5])
|
2552
|
+
# s.rolling_sum(2)
|
2553
|
+
# # =>
|
2554
|
+
# # shape: (5,)
|
2555
|
+
# # Series: 'a' [i64]
|
2556
|
+
# # [
|
2557
|
+
# # null
|
2558
|
+
# # 3
|
2559
|
+
# # 5
|
2560
|
+
# # 7
|
2561
|
+
# # 9
|
2562
|
+
# # ]
|
2563
|
+
def rolling_sum(
|
2564
|
+
window_size,
|
2565
|
+
weights: nil,
|
2566
|
+
min_periods: nil,
|
2567
|
+
center: false
|
2568
|
+
)
|
2569
|
+
to_frame
|
2570
|
+
.select(
|
2571
|
+
Polars.col(name).rolling_sum(
|
2572
|
+
window_size,
|
2573
|
+
weights: weights,
|
2574
|
+
min_periods: min_periods,
|
2575
|
+
center: center
|
2576
|
+
)
|
2577
|
+
)
|
2578
|
+
.to_series
|
2579
|
+
end
|
2291
2580
|
|
2292
|
-
#
|
2293
|
-
#
|
2581
|
+
# Compute a rolling std dev.
|
2582
|
+
#
|
2583
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2584
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2585
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2586
|
+
#
|
2587
|
+
# @param window_size [Integer]
|
2588
|
+
# The length of the window.
|
2589
|
+
# @param weights [Array]
|
2590
|
+
# An optional slice with the same length as the window that will be multiplied
|
2591
|
+
# elementwise with the values in the window.
|
2592
|
+
# @param min_periods [Integer]
|
2593
|
+
# The number of values in the window that should be non-null before computing
|
2594
|
+
# a result. If None, it will be set equal to window size.
|
2595
|
+
# @param center [Boolean]
|
2596
|
+
# Set the labels at the center of the window
|
2597
|
+
#
|
2598
|
+
# @return [Series]
|
2599
|
+
#
|
2600
|
+
# @example
|
2601
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2602
|
+
# s.rolling_std(3)
|
2603
|
+
# # =>
|
2604
|
+
# # shape: (6,)
|
2605
|
+
# # Series: 'a' [f64]
|
2606
|
+
# # [
|
2607
|
+
# # null
|
2608
|
+
# # null
|
2609
|
+
# # 1.0
|
2610
|
+
# # 1.0
|
2611
|
+
# # 1.527525
|
2612
|
+
# # 2.0
|
2613
|
+
# # ]
|
2614
|
+
def rolling_std(
|
2615
|
+
window_size,
|
2616
|
+
weights: nil,
|
2617
|
+
min_periods: nil,
|
2618
|
+
center: false
|
2619
|
+
)
|
2620
|
+
to_frame
|
2621
|
+
.select(
|
2622
|
+
Polars.col(name).rolling_std(
|
2623
|
+
window_size,
|
2624
|
+
weights: weights,
|
2625
|
+
min_periods: min_periods,
|
2626
|
+
center: center
|
2627
|
+
)
|
2628
|
+
)
|
2629
|
+
.to_series
|
2630
|
+
end
|
2294
2631
|
|
2295
|
-
#
|
2296
|
-
#
|
2632
|
+
# Compute a rolling variance.
|
2633
|
+
#
|
2634
|
+
# A window of length `window_size` will traverse the array. The values that fill
|
2635
|
+
# this window will (optionally) be multiplied with the weights given by the
|
2636
|
+
# `weight` vector. The resulting values will be aggregated to their sum.
|
2637
|
+
#
|
2638
|
+
# @param window_size [Integer]
|
2639
|
+
# The length of the window.
|
2640
|
+
# @param weights [Array]
|
2641
|
+
# An optional slice with the same length as the window that will be multiplied
|
2642
|
+
# elementwise with the values in the window.
|
2643
|
+
# @param min_periods [Integer]
|
2644
|
+
# The number of values in the window that should be non-null before computing
|
2645
|
+
# a result. If None, it will be set equal to window size.
|
2646
|
+
# @param center [Boolean]
|
2647
|
+
# Set the labels at the center of the window
|
2648
|
+
#
|
2649
|
+
# @return [Series]
|
2650
|
+
#
|
2651
|
+
# @example
|
2652
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2653
|
+
# s.rolling_var(3)
|
2654
|
+
# # =>
|
2655
|
+
# # shape: (6,)
|
2656
|
+
# # Series: 'a' [f64]
|
2657
|
+
# # [
|
2658
|
+
# # null
|
2659
|
+
# # null
|
2660
|
+
# # 1.0
|
2661
|
+
# # 1.0
|
2662
|
+
# # 2.333333
|
2663
|
+
# # 4.0
|
2664
|
+
# # ]
|
2665
|
+
def rolling_var(
|
2666
|
+
window_size,
|
2667
|
+
weights: nil,
|
2668
|
+
min_periods: nil,
|
2669
|
+
center: false
|
2670
|
+
)
|
2671
|
+
to_frame
|
2672
|
+
.select(
|
2673
|
+
Polars.col(name).rolling_var(
|
2674
|
+
window_size,
|
2675
|
+
weights: weights,
|
2676
|
+
min_periods: min_periods,
|
2677
|
+
center: center
|
2678
|
+
)
|
2679
|
+
)
|
2680
|
+
.to_series
|
2681
|
+
end
|
2297
2682
|
|
2298
2683
|
# def rolling_apply
|
2299
2684
|
# end
|
2300
2685
|
|
2301
|
-
#
|
2302
|
-
#
|
2686
|
+
# Compute a rolling median.
|
2687
|
+
#
|
2688
|
+
# @param window_size [Integer]
|
2689
|
+
# The length of the window.
|
2690
|
+
# @param weights [Array]
|
2691
|
+
# An optional slice with the same length as the window that will be multiplied
|
2692
|
+
# elementwise with the values in the window.
|
2693
|
+
# @param min_periods [Integer]
|
2694
|
+
# The number of values in the window that should be non-null before computing
|
2695
|
+
# a result. If None, it will be set equal to window size.
|
2696
|
+
# @param center [Boolean]
|
2697
|
+
# Set the labels at the center of the window
|
2698
|
+
#
|
2699
|
+
# @return [Series]
|
2700
|
+
#
|
2701
|
+
# @example
|
2702
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2703
|
+
# s.rolling_median(3)
|
2704
|
+
# # =>
|
2705
|
+
# # shape: (6,)
|
2706
|
+
# # Series: 'a' [f64]
|
2707
|
+
# # [
|
2708
|
+
# # null
|
2709
|
+
# # null
|
2710
|
+
# # 2.0
|
2711
|
+
# # 3.0
|
2712
|
+
# # 4.0
|
2713
|
+
# # 6.0
|
2714
|
+
# # ]
|
2715
|
+
def rolling_median(
|
2716
|
+
window_size,
|
2717
|
+
weights: nil,
|
2718
|
+
min_periods: nil,
|
2719
|
+
center: false
|
2720
|
+
)
|
2721
|
+
if min_periods.nil?
|
2722
|
+
min_periods = window_size
|
2723
|
+
end
|
2303
2724
|
|
2304
|
-
|
2305
|
-
|
2725
|
+
to_frame
|
2726
|
+
.select(
|
2727
|
+
Polars.col(name).rolling_median(
|
2728
|
+
window_size,
|
2729
|
+
weights: weights,
|
2730
|
+
min_periods: min_periods,
|
2731
|
+
center: center
|
2732
|
+
)
|
2733
|
+
)
|
2734
|
+
.to_series
|
2735
|
+
end
|
2306
2736
|
|
2307
|
-
#
|
2308
|
-
#
|
2737
|
+
# Compute a rolling quantile.
|
2738
|
+
#
|
2739
|
+
# @param quantile [Float]
|
2740
|
+
# Quantile between 0.0 and 1.0.
|
2741
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
2742
|
+
# Interpolation method.
|
2743
|
+
# @param window_size [Integer]
|
2744
|
+
# The length of the window.
|
2745
|
+
# @param weights [Array]
|
2746
|
+
# An optional slice with the same length as the window that will be multiplied
|
2747
|
+
# elementwise with the values in the window.
|
2748
|
+
# @param min_periods [Integer]
|
2749
|
+
# The number of values in the window that should be non-null before computing
|
2750
|
+
# a result. If None, it will be set equal to window size.
|
2751
|
+
# @param center [Boolean]
|
2752
|
+
# Set the labels at the center of the window
|
2753
|
+
#
|
2754
|
+
# @return [Series]
|
2755
|
+
#
|
2756
|
+
# @example
|
2757
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2758
|
+
# s.rolling_quantile(0.33, window_size: 3)
|
2759
|
+
# # =>
|
2760
|
+
# # shape: (6,)
|
2761
|
+
# # Series: 'a' [f64]
|
2762
|
+
# # [
|
2763
|
+
# # null
|
2764
|
+
# # null
|
2765
|
+
# # 1.0
|
2766
|
+
# # 2.0
|
2767
|
+
# # 3.0
|
2768
|
+
# # 4.0
|
2769
|
+
# # ]
|
2770
|
+
#
|
2771
|
+
# @example
|
2772
|
+
# s.rolling_quantile(0.33, interpolation: "linear", window_size: 3)
|
2773
|
+
# # =>
|
2774
|
+
# # shape: (6,)
|
2775
|
+
# # Series: 'a' [f64]
|
2776
|
+
# # [
|
2777
|
+
# # null
|
2778
|
+
# # null
|
2779
|
+
# # 1.66
|
2780
|
+
# # 2.66
|
2781
|
+
# # 3.66
|
2782
|
+
# # 5.32
|
2783
|
+
# # ]
|
2784
|
+
def rolling_quantile(
|
2785
|
+
quantile,
|
2786
|
+
interpolation: "nearest",
|
2787
|
+
window_size: 2,
|
2788
|
+
weights: nil,
|
2789
|
+
min_periods: nil,
|
2790
|
+
center: false
|
2791
|
+
)
|
2792
|
+
if min_periods.nil?
|
2793
|
+
min_periods = window_size
|
2794
|
+
end
|
2309
2795
|
|
2310
|
-
|
2311
|
-
|
2796
|
+
to_frame
|
2797
|
+
.select(
|
2798
|
+
Polars.col(name).rolling_quantile(
|
2799
|
+
quantile,
|
2800
|
+
interpolation: interpolation,
|
2801
|
+
window_size: window_size,
|
2802
|
+
weights: weights,
|
2803
|
+
min_periods: min_periods,
|
2804
|
+
center: center
|
2805
|
+
)
|
2806
|
+
)
|
2807
|
+
.to_series
|
2808
|
+
end
|
2809
|
+
|
2810
|
+
# Compute a rolling skew.
|
2811
|
+
#
|
2812
|
+
# @param window_size [Integer]
|
2813
|
+
# Integer size of the rolling window.
|
2814
|
+
# @param bias [Boolean]
|
2815
|
+
# If false, the calculations are corrected for statistical bias.
|
2816
|
+
#
|
2817
|
+
# @return [Series]
|
2818
|
+
#
|
2819
|
+
# @example
|
2820
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
|
2821
|
+
# s.rolling_skew(3)
|
2822
|
+
# # =>
|
2823
|
+
# # shape: (6,)
|
2824
|
+
# # Series: 'a' [f64]
|
2825
|
+
# # [
|
2826
|
+
# # null
|
2827
|
+
# # null
|
2828
|
+
# # 0.0
|
2829
|
+
# # 0.0
|
2830
|
+
# # 0.381802
|
2831
|
+
# # 0.0
|
2832
|
+
# # ]
|
2833
|
+
def rolling_skew(window_size, bias: true)
|
2834
|
+
super
|
2835
|
+
end
|
2836
|
+
|
2837
|
+
# Sample from this Series.
|
2838
|
+
#
|
2839
|
+
# @param n [Integer]
|
2840
|
+
# Number of items to return. Cannot be used with `frac`. Defaults to 1 if
|
2841
|
+
# `frac` is None.
|
2842
|
+
# @param frac [Float]
|
2843
|
+
# Fraction of items to return. Cannot be used with `n`.
|
2844
|
+
# @param with_replacement [Boolean]
|
2845
|
+
# Allow values to be sampled more than once.
|
2846
|
+
# @param shuffle [Boolean]
|
2847
|
+
# Shuffle the order of sampled data points.
|
2848
|
+
# @param seed [Integer]
|
2849
|
+
# Seed for the random number generator. If set to None (default), a random
|
2850
|
+
# seed is used.
|
2851
|
+
#
|
2852
|
+
# @return [Series]
|
2853
|
+
#
|
2854
|
+
# @example
|
2855
|
+
# s = Polars::Series.new("a", [1, 2, 3, 4, 5])
|
2856
|
+
# s.sample(n: 2, seed: 0)
|
2857
|
+
# # =>
|
2858
|
+
# # shape: (2,)
|
2859
|
+
# # Series: 'a' [i64]
|
2860
|
+
# # [
|
2861
|
+
# # 1
|
2862
|
+
# # 5
|
2863
|
+
# # ]
|
2864
|
+
def sample(
|
2865
|
+
n: nil,
|
2866
|
+
frac: nil,
|
2867
|
+
with_replacement: false,
|
2868
|
+
shuffle: false,
|
2869
|
+
seed: nil
|
2870
|
+
)
|
2871
|
+
if !n.nil? && !frac.nil?
|
2872
|
+
raise ArgumentError, "cannot specify both `n` and `frac`"
|
2873
|
+
end
|
2874
|
+
|
2875
|
+
if n.nil? && !frac.nil?
|
2876
|
+
return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
|
2877
|
+
end
|
2878
|
+
|
2879
|
+
if n.nil?
|
2880
|
+
n = 1
|
2881
|
+
end
|
2882
|
+
Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
|
2883
|
+
end
|
2312
2884
|
|
2313
2885
|
# Get a boolean mask of the local maximum peaks.
|
2314
2886
|
#
|
@@ -2425,8 +2997,61 @@ module Polars
|
|
2425
2997
|
super
|
2426
2998
|
end
|
2427
2999
|
|
2428
|
-
#
|
2429
|
-
#
|
3000
|
+
# Assign ranks to data, dealing with ties appropriately.
|
3001
|
+
#
|
3002
|
+
# @param method ["average", "min", "max", "dense", "ordinal", "random"]
|
3003
|
+
# The method used to assign ranks to tied elements.
|
3004
|
+
# The following methods are available (default is 'average'):
|
3005
|
+
#
|
3006
|
+
# - 'average' : The average of the ranks that would have been assigned to
|
3007
|
+
# all the tied values is assigned to each value.
|
3008
|
+
# - 'min' : The minimum of the ranks that would have been assigned to all
|
3009
|
+
# the tied values is assigned to each value. (This is also referred to
|
3010
|
+
# as "competition" ranking.)
|
3011
|
+
# - 'max' : The maximum of the ranks that would have been assigned to all
|
3012
|
+
# the tied values is assigned to each value.
|
3013
|
+
# - 'dense' : Like 'min', but the rank of the next highest element is
|
3014
|
+
# assigned the rank immediately after those assigned to the tied
|
3015
|
+
# elements.
|
3016
|
+
# - 'ordinal' : All values are given a distinct rank, corresponding to
|
3017
|
+
# the order that the values occur in the Series.
|
3018
|
+
# - 'random' : Like 'ordinal', but the rank for ties is not dependent
|
3019
|
+
# on the order that the values occur in the Series.
|
3020
|
+
# @param reverse [Boolean]
|
3021
|
+
# Reverse the operation.
|
3022
|
+
#
|
3023
|
+
# @return [Series]
|
3024
|
+
#
|
3025
|
+
# @example The 'average' method:
|
3026
|
+
# s = Polars::Series.new("a", [3, 6, 1, 1, 6])
|
3027
|
+
# s.rank
|
3028
|
+
# # =>
|
3029
|
+
# # shape: (5,)
|
3030
|
+
# # Series: 'a' [f32]
|
3031
|
+
# # [
|
3032
|
+
# # 3.0
|
3033
|
+
# # 4.5
|
3034
|
+
# # 1.5
|
3035
|
+
# # 1.5
|
3036
|
+
# # 4.5
|
3037
|
+
# # ]
|
3038
|
+
#
|
3039
|
+
# @example The 'ordinal' method:
|
3040
|
+
# s = Polars::Series.new("a", [3, 6, 1, 1, 6])
|
3041
|
+
# s.rank(method: "ordinal")
|
3042
|
+
# # =>
|
3043
|
+
# # shape: (5,)
|
3044
|
+
# # Series: 'a' [u32]
|
3045
|
+
# # [
|
3046
|
+
# # 3
|
3047
|
+
# # 4
|
3048
|
+
# # 1
|
3049
|
+
# # 2
|
3050
|
+
# # 5
|
3051
|
+
# # ]
|
3052
|
+
def rank(method: "average", reverse: false)
|
3053
|
+
super
|
3054
|
+
end
|
2430
3055
|
|
2431
3056
|
# Calculate the n-th discrete difference.
|
2432
3057
|
#
|
@@ -2440,8 +3065,56 @@ module Polars
|
|
2440
3065
|
super
|
2441
3066
|
end
|
2442
3067
|
|
2443
|
-
#
|
2444
|
-
#
|
3068
|
+
# Computes percentage change between values.
|
3069
|
+
#
|
3070
|
+
# Percentage change (as fraction) between current element and most-recent
|
3071
|
+
# non-null element at least `n` period(s) before the current element.
|
3072
|
+
#
|
3073
|
+
# Computes the change from the previous row by default.
|
3074
|
+
#
|
3075
|
+
# @param n [Integer]
|
3076
|
+
# periods to shift for forming percent change.
|
3077
|
+
#
|
3078
|
+
# @return [Series]
|
3079
|
+
#
|
3080
|
+
# @example
|
3081
|
+
# Polars::Series.new(0..9).pct_change
|
3082
|
+
# # =>
|
3083
|
+
# # shape: (10,)
|
3084
|
+
# # Series: '' [f64]
|
3085
|
+
# # [
|
3086
|
+
# # null
|
3087
|
+
# # inf
|
3088
|
+
# # 1.0
|
3089
|
+
# # 0.5
|
3090
|
+
# # 0.333333
|
3091
|
+
# # 0.25
|
3092
|
+
# # 0.2
|
3093
|
+
# # 0.166667
|
3094
|
+
# # 0.142857
|
3095
|
+
# # 0.125
|
3096
|
+
# # ]
|
3097
|
+
#
|
3098
|
+
# @example
|
3099
|
+
# Polars::Series.new([1, 2, 4, 8, 16, 32, 64, 128, 256, 512]).pct_change(n: 2)
|
3100
|
+
# # =>
|
3101
|
+
# # shape: (10,)
|
3102
|
+
# # Series: '' [f64]
|
3103
|
+
# # [
|
3104
|
+
# # null
|
3105
|
+
# # null
|
3106
|
+
# # 3.0
|
3107
|
+
# # 3.0
|
3108
|
+
# # 3.0
|
3109
|
+
# # 3.0
|
3110
|
+
# # 3.0
|
3111
|
+
# # 3.0
|
3112
|
+
# # 3.0
|
3113
|
+
# # 3.0
|
3114
|
+
# # ]
|
3115
|
+
def pct_change(n: 1)
|
3116
|
+
super
|
3117
|
+
end
|
2445
3118
|
|
2446
3119
|
# Compute the sample skewness of a data set.
|
2447
3120
|
#
|
@@ -2571,14 +3244,49 @@ module Polars
|
|
2571
3244
|
super
|
2572
3245
|
end
|
2573
3246
|
|
2574
|
-
#
|
2575
|
-
#
|
3247
|
+
# Exponentially-weighted moving average.
|
3248
|
+
#
|
3249
|
+
# @return [Series]
|
3250
|
+
def ewm_mean(
|
3251
|
+
com: nil,
|
3252
|
+
span: nil,
|
3253
|
+
half_life: nil,
|
3254
|
+
alpha: nil,
|
3255
|
+
adjust: true,
|
3256
|
+
min_periods: 1
|
3257
|
+
)
|
3258
|
+
super
|
3259
|
+
end
|
2576
3260
|
|
2577
|
-
#
|
2578
|
-
#
|
3261
|
+
# Exponentially-weighted moving standard deviation.
|
3262
|
+
#
|
3263
|
+
# @return [Series]
|
3264
|
+
def ewm_std(
|
3265
|
+
com: nil,
|
3266
|
+
span: nil,
|
3267
|
+
half_life: nil,
|
3268
|
+
alpha: nil,
|
3269
|
+
adjust: true,
|
3270
|
+
bias: false,
|
3271
|
+
min_periods: 1
|
3272
|
+
)
|
3273
|
+
super
|
3274
|
+
end
|
2579
3275
|
|
2580
|
-
#
|
2581
|
-
#
|
3276
|
+
# Exponentially-weighted moving variance.
|
3277
|
+
#
|
3278
|
+
# @return [Series]
|
3279
|
+
def ewm_var(
|
3280
|
+
com: nil,
|
3281
|
+
span: nil,
|
3282
|
+
half_life: nil,
|
3283
|
+
alpha: nil,
|
3284
|
+
adjust: true,
|
3285
|
+
bias: false,
|
3286
|
+
min_periods: 1
|
3287
|
+
)
|
3288
|
+
super
|
3289
|
+
end
|
2582
3290
|
|
2583
3291
|
# Extend the Series with given number of values.
|
2584
3292
|
#
|
@@ -2645,20 +3353,40 @@ module Polars
|
|
2645
3353
|
super
|
2646
3354
|
end
|
2647
3355
|
|
2648
|
-
#
|
2649
|
-
#
|
3356
|
+
# Create an object namespace of all list related methods.
|
3357
|
+
#
|
3358
|
+
# @return [ListNameSpace]
|
3359
|
+
def arr
|
3360
|
+
ListNameSpace.new(self)
|
3361
|
+
end
|
2650
3362
|
|
2651
|
-
#
|
2652
|
-
#
|
3363
|
+
# Create an object namespace of all categorical related methods.
|
3364
|
+
#
|
3365
|
+
# @return [CatNameSpace]
|
3366
|
+
def cat
|
3367
|
+
CatNameSpace.new(self)
|
3368
|
+
end
|
2653
3369
|
|
2654
|
-
#
|
2655
|
-
#
|
3370
|
+
# Create an object namespace of all datetime related methods.
|
3371
|
+
#
|
3372
|
+
# @return [DateTimeNameSpace]
|
3373
|
+
def dt
|
3374
|
+
DateTimeNameSpace.new(self)
|
3375
|
+
end
|
2656
3376
|
|
2657
|
-
#
|
2658
|
-
#
|
3377
|
+
# Create an object namespace of all string related methods.
|
3378
|
+
#
|
3379
|
+
# @return [StringNameSpace]
|
3380
|
+
def str
|
3381
|
+
StringNameSpace.new(self)
|
3382
|
+
end
|
2659
3383
|
|
2660
|
-
#
|
2661
|
-
#
|
3384
|
+
# Create an object namespace of all struct related methods.
|
3385
|
+
#
|
3386
|
+
# @return [StructNameSpace]
|
3387
|
+
def struct
|
3388
|
+
StructNameSpace.new(self)
|
3389
|
+
end
|
2662
3390
|
|
2663
3391
|
private
|
2664
3392
|
|
@@ -2751,6 +3479,10 @@ module Polars
|
|
2751
3479
|
|
2752
3480
|
if ruby_dtype == Date
|
2753
3481
|
RbSeries.new_opt_date(name, values, strict)
|
3482
|
+
elsif ruby_dtype == Time
|
3483
|
+
RbSeries.new_opt_datetime(name, values, strict)
|
3484
|
+
elsif ruby_dtype == DateTime
|
3485
|
+
RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
|
2754
3486
|
else
|
2755
3487
|
raise Todo
|
2756
3488
|
end
|
@@ -2764,7 +3496,26 @@ module Polars
|
|
2764
3496
|
raise Todo
|
2765
3497
|
end
|
2766
3498
|
|
2767
|
-
|
3499
|
+
if value.is_a?(Array)
|
3500
|
+
count = 0
|
3501
|
+
equal_to_inner = true
|
3502
|
+
values.each do |lst|
|
3503
|
+
lst.each do |vl|
|
3504
|
+
equal_to_inner = vl.class == nested_dtype
|
3505
|
+
if !equal_to_inner || count > 50
|
3506
|
+
break
|
3507
|
+
end
|
3508
|
+
count += 1
|
3509
|
+
end
|
3510
|
+
end
|
3511
|
+
if equal_to_inner
|
3512
|
+
dtype = Utils.rb_type_to_dtype(nested_dtype)
|
3513
|
+
# TODO rescue and fallback to new_object
|
3514
|
+
return RbSeries.new_list(name, values, dtype)
|
3515
|
+
end
|
3516
|
+
end
|
3517
|
+
|
3518
|
+
RbSeries.new_object(name, values, strict)
|
2768
3519
|
else
|
2769
3520
|
constructor = rb_type_to_constructor(value.class)
|
2770
3521
|
constructor.call(name, values, strict)
|
@@ -2804,8 +3555,7 @@ module Polars
|
|
2804
3555
|
def rb_type_to_constructor(dtype)
|
2805
3556
|
RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
|
2806
3557
|
rescue KeyError
|
2807
|
-
|
2808
|
-
raise ArgumentError, "Cannot determine type"
|
3558
|
+
RbSeries.method(:new_object)
|
2809
3559
|
end
|
2810
3560
|
|
2811
3561
|
def _get_first_non_none(values)
|