polars-df 0.1.3 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
data/lib/polars/series.rb CHANGED
@@ -3,9 +3,6 @@ module Polars
3
3
  class Series
4
4
  include ExprDispatch
5
5
 
6
- # @private
7
- attr_accessor :_s
8
-
9
6
  # Create a new Series.
10
7
  #
11
8
  # @param name [String, Array, nil]
@@ -26,7 +23,7 @@ module Polars
26
23
  # @example Constructing a Series by specifying name and values positionally:
27
24
  # s = Polars::Series.new("a", [1, 2, 3])
28
25
  #
29
- # @example Notice that the dtype is automatically inferred as a polars Int64:
26
+ # @example Notice that the dtype is automatically inferred as a polars `:i64`:
30
27
  # s.dtype
31
28
  # # => :i64
32
29
  #
@@ -48,6 +45,11 @@ module Polars
48
45
 
49
46
  name = "" if name.nil?
50
47
 
48
+ # TODO improve
49
+ if values.is_a?(Range) && values.begin.is_a?(String)
50
+ values = values.to_a
51
+ end
52
+
51
53
  if values.nil?
52
54
  self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
53
55
  elsif values.is_a?(Series)
@@ -730,8 +732,43 @@ module Polars
730
732
  Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
731
733
  end
732
734
 
733
- # def cumulative_eval
734
- # end
735
+ # Run an expression over a sliding window that increases `1` slot every iteration.
736
+ #
737
+ # @param expr [Expr]
738
+ # Expression to evaluate
739
+ # @param min_periods [Integer]
740
+ # Number of valid values there should be in the window before the expression
741
+ # is evaluated. valid values = `length - null_count`
742
+ # @param parallel [Boolean]
743
+ # Run in parallel. Don't do this in a groupby or another operation that
744
+ # already has much parallelization.
745
+ #
746
+ # @return [Series]
747
+ #
748
+ # @note
749
+ # This functionality is experimental and may change without it being considered a
750
+ # breaking change.
751
+ #
752
+ # @note
753
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
754
+ # for operations that visit all elements.
755
+ #
756
+ # @example
757
+ # s = Polars::Series.new("values", [1, 2, 3, 4, 5])
758
+ # s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
759
+ # # =>
760
+ # # shape: (5,)
761
+ # # Series: 'values' [f64]
762
+ # # [
763
+ # # 0.0
764
+ # # -3.0
765
+ # # -8.0
766
+ # # -15.0
767
+ # # -24.0
768
+ # # ]
769
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
770
+ super
771
+ end
735
772
 
736
773
  # Return a copy of the Series with a new alias/name.
737
774
  #
@@ -1436,8 +1473,57 @@ module Polars
1436
1473
  super
1437
1474
  end
1438
1475
 
1439
- # def is_in
1440
- # end
1476
+ # Check if elements of this Series are in the other Series.
1477
+ #
1478
+ # @return [Series]
1479
+ #
1480
+ # @example
1481
+ # s = Polars::Series.new("a", [1, 2, 3])
1482
+ # s2 = Polars::Series.new("b", [2, 4])
1483
+ # s2.is_in(s)
1484
+ # # =>
1485
+ # # shape: (2,)
1486
+ # # Series: 'b' [bool]
1487
+ # # [
1488
+ # # true
1489
+ # # false
1490
+ # # ]
1491
+ #
1492
+ # @example
1493
+ # sets = Polars::Series.new("sets", [[1, 2, 3], [1, 2], [9, 10]])
1494
+ # # =>
1495
+ # # shape: (3,)
1496
+ # # Series: 'sets' [list]
1497
+ # # [
1498
+ # # [1, 2, 3]
1499
+ # # [1, 2]
1500
+ # # [9, 10]
1501
+ # # ]
1502
+ #
1503
+ # @example
1504
+ # optional_members = Polars::Series.new("optional_members", [1, 2, 3])
1505
+ # # =>
1506
+ # # shape: (3,)
1507
+ # # Series: 'optional_members' [i64]
1508
+ # # [
1509
+ # # 1
1510
+ # # 2
1511
+ # # 3
1512
+ # # ]
1513
+ #
1514
+ # @example
1515
+ # optional_members.is_in(sets)
1516
+ # # =>
1517
+ # # shape: (3,)
1518
+ # # Series: 'optional_members' [bool]
1519
+ # # [
1520
+ # # true
1521
+ # # true
1522
+ # # false
1523
+ # # ]
1524
+ def is_in(other)
1525
+ super
1526
+ end
1441
1527
 
1442
1528
  # Get index values where Boolean Series evaluate `true`.
1443
1529
  #
@@ -1587,8 +1673,32 @@ module Polars
1587
1673
  super
1588
1674
  end
1589
1675
 
1590
- # def to_physical
1591
- # end
1676
+ # Cast to physical representation of the logical dtype.
1677
+ #
1678
+ # - `:date` -> `:i32`
1679
+ # - `:datetime` -> `:i64`
1680
+ # - `:time` -> `:i64`
1681
+ # - `:duration` -> `:i64`
1682
+ # - `:cat` -> `:u32`
1683
+ # - other data types will be left unchanged.
1684
+ #
1685
+ # @return [Series]
1686
+ #
1687
+ # @example
1688
+ # s = Polars::Series.new("values", ["a", nil, "x", "a"])
1689
+ # s.cast(:cat).to_physical
1690
+ # # =>
1691
+ # # shape: (4,)
1692
+ # # Series: 'values' [u32]
1693
+ # # [
1694
+ # # 0
1695
+ # # null
1696
+ # # 1
1697
+ # # 0
1698
+ # # ]
1699
+ def to_physical
1700
+ super
1701
+ end
1592
1702
 
1593
1703
  # Convert this Series to a Ruby Array. This operation clones data.
1594
1704
  #
@@ -1704,8 +1814,34 @@ module Polars
1704
1814
  # def to_numo
1705
1815
  # end
1706
1816
 
1707
- # def set
1708
- # end
1817
+ # Set masked values.
1818
+ #
1819
+ # @param filter [Series]
1820
+ # Boolean mask.
1821
+ # @param value [Object]
1822
+ # Value with which to replace the masked values.
1823
+ #
1824
+ # @return [Series]
1825
+ #
1826
+ # @note
1827
+ # Use of this function is frequently an anti-pattern, as it can
1828
+ # block optimization (predicate pushdown, etc). Consider using
1829
+ # `Polars.when(predicate).then(value).otherwise(self)` instead.
1830
+ #
1831
+ # @example
1832
+ # s = Polars::Series.new("a", [1, 2, 3])
1833
+ # s.set(s == 2, 10)
1834
+ # # =>
1835
+ # # shape: (3,)
1836
+ # # Series: 'a' [i64]
1837
+ # # [
1838
+ # # 1
1839
+ # # 10
1840
+ # # 3
1841
+ # # ]
1842
+ def set(filter, value)
1843
+ Utils.wrap_s(_s.send("set_with_mask_#{dtype}", filter._s, value))
1844
+ end
1709
1845
 
1710
1846
  # Set values at the index locations.
1711
1847
  #
@@ -1909,8 +2045,28 @@ module Polars
1909
2045
  super
1910
2046
  end
1911
2047
 
1912
- # def dot
1913
- # end
2048
+ # Compute the dot/inner product between two Series.
2049
+ #
2050
+ # @param other [Object]
2051
+ # Series (or array) to compute dot product with.
2052
+ #
2053
+ # @return [Numeric]
2054
+ #
2055
+ # @example
2056
+ # s = Polars::Series.new("a", [1, 2, 3])
2057
+ # s2 = Polars::Series.new("b", [4.0, 5.0, 6.0])
2058
+ # s.dot(s2)
2059
+ # # => 32.0
2060
+ def dot(other)
2061
+ if !other.is_a?(Series)
2062
+ other = Series.new(other)
2063
+ end
2064
+ if len != other.len
2065
+ n, m = len, other.len
2066
+ raise ArgumentError, "Series length mismatch: expected #{n}, found #{m}"
2067
+ end
2068
+ _s.dot(other._s)
2069
+ end
1914
2070
 
1915
2071
  # Compute the most occurring value(s).
1916
2072
  #
@@ -2185,8 +2341,41 @@ module Polars
2185
2341
  super
2186
2342
  end
2187
2343
 
2188
- # def apply
2189
- # end
2344
+ # Apply a custom/user-defined function (UDF) over elements in this Series and
2345
+ # return a new Series.
2346
+ #
2347
+ # If the function returns another datatype, the return_dtype arg should be set,
2348
+ # otherwise the method will fail.
2349
+ #
2350
+ # @param return_dtype [Symbol]
2351
+ # Output datatype. If none is given, the same datatype as this Series will be
2352
+ # used.
2353
+ # @param skip_nulls [Boolean]
2354
+ # Nulls will be skipped and not passed to the Ruby function.
2355
+ # This is faster because Ruby can be skipped and because we call
2356
+ # more specialized functions.
2357
+ #
2358
+ # @return [Series]
2359
+ #
2360
+ # @example
2361
+ # s = Polars::Series.new("a", [1, 2, 3])
2362
+ # s.apply { |x| x + 10 }
2363
+ # # =>
2364
+ # # shape: (3,)
2365
+ # # Series: 'a' [i64]
2366
+ # # [
2367
+ # # 11
2368
+ # # 12
2369
+ # # 13
2370
+ # # ]
2371
+ def apply(return_dtype: nil, skip_nulls: true, &func)
2372
+ if return_dtype.nil?
2373
+ pl_return_dtype = nil
2374
+ else
2375
+ pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
2376
+ end
2377
+ Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2378
+ end
2190
2379
 
2191
2380
  # Shift the values by a given period.
2192
2381
  #
@@ -2277,38 +2466,509 @@ module Polars
2277
2466
  Utils.wrap_s(_s.zip_with(mask._s, other._s))
2278
2467
  end
2279
2468
 
2280
- # def rolling_min
2281
- # end
2469
+ # Apply a rolling min (moving min) over the values in this array.
2470
+ #
2471
+ # A window of length `window_size` will traverse the array. The values that fill
2472
+ # this window will (optionally) be multiplied with the weights given by the
2473
+ # `weight` vector. The resulting values will be aggregated to their sum.
2474
+ #
2475
+ # @param window_size [Integer]
2476
+ # The length of the window.
2477
+ # @param weights [Array]
2478
+ # An optional slice with the same length as the window that will be multiplied
2479
+ # elementwise with the values in the window.
2480
+ # @param min_periods [Integer]
2481
+ # The number of values in the window that should be non-null before computing
2482
+ # a result. If None, it will be set equal to window size.
2483
+ # @param center [Boolean]
2484
+ # Set the labels at the center of the window
2485
+ #
2486
+ # @return [Series]
2487
+ #
2488
+ # @example
2489
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
2490
+ # s.rolling_min(3)
2491
+ # # =>
2492
+ # # shape: (5,)
2493
+ # # Series: 'a' [i64]
2494
+ # # [
2495
+ # # null
2496
+ # # null
2497
+ # # 100
2498
+ # # 200
2499
+ # # 300
2500
+ # # ]
2501
+ def rolling_min(
2502
+ window_size,
2503
+ weights: nil,
2504
+ min_periods: nil,
2505
+ center: false
2506
+ )
2507
+ to_frame
2508
+ .select(
2509
+ Polars.col(name).rolling_min(
2510
+ window_size,
2511
+ weights: weights,
2512
+ min_periods: min_periods,
2513
+ center: center
2514
+ )
2515
+ )
2516
+ .to_series
2517
+ end
2282
2518
 
2283
- # def rolling_max
2284
- # end
2519
+ # Apply a rolling max (moving max) over the values in this array.
2520
+ #
2521
+ # A window of length `window_size` will traverse the array. The values that fill
2522
+ # this window will (optionally) be multiplied with the weights given by the
2523
+ # `weight` vector. The resulting values will be aggregated to their sum.
2524
+ #
2525
+ # @param window_size [Integer]
2526
+ # The length of the window.
2527
+ # @param weights [Array]
2528
+ # An optional slice with the same length as the window that will be multiplied
2529
+ # elementwise with the values in the window.
2530
+ # @param min_periods [Integer]
2531
+ # The number of values in the window that should be non-null before computing
2532
+ # a result. If None, it will be set equal to window size.
2533
+ # @param center [Boolean]
2534
+ # Set the labels at the center of the window
2535
+ #
2536
+ # @return [Series]
2537
+ #
2538
+ # @example
2539
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
2540
+ # s.rolling_max(2)
2541
+ # # =>
2542
+ # # shape: (5,)
2543
+ # # Series: 'a' [i64]
2544
+ # # [
2545
+ # # null
2546
+ # # 200
2547
+ # # 300
2548
+ # # 400
2549
+ # # 500
2550
+ # # ]
2551
+ def rolling_max(
2552
+ window_size,
2553
+ weights: nil,
2554
+ min_periods: nil,
2555
+ center: false
2556
+ )
2557
+ to_frame
2558
+ .select(
2559
+ Polars.col(name).rolling_max(
2560
+ window_size,
2561
+ weights: weights,
2562
+ min_periods: min_periods,
2563
+ center: center
2564
+ )
2565
+ )
2566
+ .to_series
2567
+ end
2285
2568
 
2286
- # def rolling_mean
2287
- # end
2569
+ # Apply a rolling mean (moving mean) over the values in this array.
2570
+ #
2571
+ # A window of length `window_size` will traverse the array. The values that fill
2572
+ # this window will (optionally) be multiplied with the weights given by the
2573
+ # `weight` vector. The resulting values will be aggregated to their sum.
2574
+ #
2575
+ # @param window_size [Integer]
2576
+ # The length of the window.
2577
+ # @param weights [Array]
2578
+ # An optional slice with the same length as the window that will be multiplied
2579
+ # elementwise with the values in the window.
2580
+ # @param min_periods [Integer]
2581
+ # The number of values in the window that should be non-null before computing
2582
+ # a result. If None, it will be set equal to window size.
2583
+ # @param center [Boolean]
2584
+ # Set the labels at the center of the window
2585
+ #
2586
+ # @return [Series]
2587
+ #
2588
+ # @example
2589
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
2590
+ # s.rolling_mean(2)
2591
+ # # =>
2592
+ # # shape: (5,)
2593
+ # # Series: 'a' [f64]
2594
+ # # [
2595
+ # # null
2596
+ # # 150.0
2597
+ # # 250.0
2598
+ # # 350.0
2599
+ # # 450.0
2600
+ # # ]
2601
+ def rolling_mean(
2602
+ window_size,
2603
+ weights: nil,
2604
+ min_periods: nil,
2605
+ center: false
2606
+ )
2607
+ to_frame
2608
+ .select(
2609
+ Polars.col(name).rolling_mean(
2610
+ window_size,
2611
+ weights: weights,
2612
+ min_periods: min_periods,
2613
+ center: center
2614
+ )
2615
+ )
2616
+ .to_series
2617
+ end
2288
2618
 
2289
- # def rolling_sum
2290
- # end
2619
+ # Apply a rolling sum (moving sum) over the values in this array.
2620
+ #
2621
+ # A window of length `window_size` will traverse the array. The values that fill
2622
+ # this window will (optionally) be multiplied with the weights given by the
2623
+ # `weight` vector. The resulting values will be aggregated to their sum.
2624
+ #
2625
+ # @param window_size [Integer]
2626
+ # The length of the window.
2627
+ # @param weights [Array]
2628
+ # An optional slice with the same length as the window that will be multiplied
2629
+ # elementwise with the values in the window.
2630
+ # @param min_periods [Integer]
2631
+ # The number of values in the window that should be non-null before computing
2632
+ # a result. If None, it will be set equal to window size.
2633
+ # @param center [Boolean]
2634
+ # Set the labels at the center of the window
2635
+ #
2636
+ # @return [Series]
2637
+ #
2638
+ # @example
2639
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
2640
+ # s.rolling_sum(2)
2641
+ # # =>
2642
+ # # shape: (5,)
2643
+ # # Series: 'a' [i64]
2644
+ # # [
2645
+ # # null
2646
+ # # 3
2647
+ # # 5
2648
+ # # 7
2649
+ # # 9
2650
+ # # ]
2651
+ def rolling_sum(
2652
+ window_size,
2653
+ weights: nil,
2654
+ min_periods: nil,
2655
+ center: false
2656
+ )
2657
+ to_frame
2658
+ .select(
2659
+ Polars.col(name).rolling_sum(
2660
+ window_size,
2661
+ weights: weights,
2662
+ min_periods: min_periods,
2663
+ center: center
2664
+ )
2665
+ )
2666
+ .to_series
2667
+ end
2291
2668
 
2292
- # def rolling_std
2293
- # end
2669
+ # Compute a rolling std dev.
2670
+ #
2671
+ # A window of length `window_size` will traverse the array. The values that fill
2672
+ # this window will (optionally) be multiplied with the weights given by the
2673
+ # `weight` vector. The resulting values will be aggregated to their sum.
2674
+ #
2675
+ # @param window_size [Integer]
2676
+ # The length of the window.
2677
+ # @param weights [Array]
2678
+ # An optional slice with the same length as the window that will be multiplied
2679
+ # elementwise with the values in the window.
2680
+ # @param min_periods [Integer]
2681
+ # The number of values in the window that should be non-null before computing
2682
+ # a result. If None, it will be set equal to window size.
2683
+ # @param center [Boolean]
2684
+ # Set the labels at the center of the window
2685
+ #
2686
+ # @return [Series]
2687
+ #
2688
+ # @example
2689
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2690
+ # s.rolling_std(3)
2691
+ # # =>
2692
+ # # shape: (6,)
2693
+ # # Series: 'a' [f64]
2694
+ # # [
2695
+ # # null
2696
+ # # null
2697
+ # # 1.0
2698
+ # # 1.0
2699
+ # # 1.527525
2700
+ # # 2.0
2701
+ # # ]
2702
+ def rolling_std(
2703
+ window_size,
2704
+ weights: nil,
2705
+ min_periods: nil,
2706
+ center: false
2707
+ )
2708
+ to_frame
2709
+ .select(
2710
+ Polars.col(name).rolling_std(
2711
+ window_size,
2712
+ weights: weights,
2713
+ min_periods: min_periods,
2714
+ center: center
2715
+ )
2716
+ )
2717
+ .to_series
2718
+ end
2294
2719
 
2295
- # def rolling_var
2296
- # end
2720
+ # Compute a rolling variance.
2721
+ #
2722
+ # A window of length `window_size` will traverse the array. The values that fill
2723
+ # this window will (optionally) be multiplied with the weights given by the
2724
+ # `weight` vector. The resulting values will be aggregated to their sum.
2725
+ #
2726
+ # @param window_size [Integer]
2727
+ # The length of the window.
2728
+ # @param weights [Array]
2729
+ # An optional slice with the same length as the window that will be multiplied
2730
+ # elementwise with the values in the window.
2731
+ # @param min_periods [Integer]
2732
+ # The number of values in the window that should be non-null before computing
2733
+ # a result. If None, it will be set equal to window size.
2734
+ # @param center [Boolean]
2735
+ # Set the labels at the center of the window
2736
+ #
2737
+ # @return [Series]
2738
+ #
2739
+ # @example
2740
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2741
+ # s.rolling_var(3)
2742
+ # # =>
2743
+ # # shape: (6,)
2744
+ # # Series: 'a' [f64]
2745
+ # # [
2746
+ # # null
2747
+ # # null
2748
+ # # 1.0
2749
+ # # 1.0
2750
+ # # 2.333333
2751
+ # # 4.0
2752
+ # # ]
2753
+ def rolling_var(
2754
+ window_size,
2755
+ weights: nil,
2756
+ min_periods: nil,
2757
+ center: false
2758
+ )
2759
+ to_frame
2760
+ .select(
2761
+ Polars.col(name).rolling_var(
2762
+ window_size,
2763
+ weights: weights,
2764
+ min_periods: min_periods,
2765
+ center: center
2766
+ )
2767
+ )
2768
+ .to_series
2769
+ end
2297
2770
 
2298
2771
  # def rolling_apply
2299
2772
  # end
2300
2773
 
2301
- # def rolling_median
2302
- # end
2774
+ # Compute a rolling median.
2775
+ #
2776
+ # @param window_size [Integer]
2777
+ # The length of the window.
2778
+ # @param weights [Array]
2779
+ # An optional slice with the same length as the window that will be multiplied
2780
+ # elementwise with the values in the window.
2781
+ # @param min_periods [Integer]
2782
+ # The number of values in the window that should be non-null before computing
2783
+ # a result. If None, it will be set equal to window size.
2784
+ # @param center [Boolean]
2785
+ # Set the labels at the center of the window
2786
+ #
2787
+ # @return [Series]
2788
+ #
2789
+ # @example
2790
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2791
+ # s.rolling_median(3)
2792
+ # # =>
2793
+ # # shape: (6,)
2794
+ # # Series: 'a' [f64]
2795
+ # # [
2796
+ # # null
2797
+ # # null
2798
+ # # 2.0
2799
+ # # 3.0
2800
+ # # 4.0
2801
+ # # 6.0
2802
+ # # ]
2803
+ def rolling_median(
2804
+ window_size,
2805
+ weights: nil,
2806
+ min_periods: nil,
2807
+ center: false
2808
+ )
2809
+ if min_periods.nil?
2810
+ min_periods = window_size
2811
+ end
2303
2812
 
2304
- # def rolling_quantile
2305
- # end
2813
+ to_frame
2814
+ .select(
2815
+ Polars.col(name).rolling_median(
2816
+ window_size,
2817
+ weights: weights,
2818
+ min_periods: min_periods,
2819
+ center: center
2820
+ )
2821
+ )
2822
+ .to_series
2823
+ end
2306
2824
 
2307
- # def rolling_skew
2308
- # end
2825
+ # Compute a rolling quantile.
2826
+ #
2827
+ # @param quantile [Float]
2828
+ # Quantile between 0.0 and 1.0.
2829
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
2830
+ # Interpolation method.
2831
+ # @param window_size [Integer]
2832
+ # The length of the window.
2833
+ # @param weights [Array]
2834
+ # An optional slice with the same length as the window that will be multiplied
2835
+ # elementwise with the values in the window.
2836
+ # @param min_periods [Integer]
2837
+ # The number of values in the window that should be non-null before computing
2838
+ # a result. If None, it will be set equal to window size.
2839
+ # @param center [Boolean]
2840
+ # Set the labels at the center of the window
2841
+ #
2842
+ # @return [Series]
2843
+ #
2844
+ # @example
2845
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2846
+ # s.rolling_quantile(0.33, window_size: 3)
2847
+ # # =>
2848
+ # # shape: (6,)
2849
+ # # Series: 'a' [f64]
2850
+ # # [
2851
+ # # null
2852
+ # # null
2853
+ # # 1.0
2854
+ # # 2.0
2855
+ # # 3.0
2856
+ # # 4.0
2857
+ # # ]
2858
+ #
2859
+ # @example
2860
+ # s.rolling_quantile(0.33, interpolation: "linear", window_size: 3)
2861
+ # # =>
2862
+ # # shape: (6,)
2863
+ # # Series: 'a' [f64]
2864
+ # # [
2865
+ # # null
2866
+ # # null
2867
+ # # 1.66
2868
+ # # 2.66
2869
+ # # 3.66
2870
+ # # 5.32
2871
+ # # ]
2872
+ def rolling_quantile(
2873
+ quantile,
2874
+ interpolation: "nearest",
2875
+ window_size: 2,
2876
+ weights: nil,
2877
+ min_periods: nil,
2878
+ center: false
2879
+ )
2880
+ if min_periods.nil?
2881
+ min_periods = window_size
2882
+ end
2309
2883
 
2310
- # def sample
2311
- # end
2884
+ to_frame
2885
+ .select(
2886
+ Polars.col(name).rolling_quantile(
2887
+ quantile,
2888
+ interpolation: interpolation,
2889
+ window_size: window_size,
2890
+ weights: weights,
2891
+ min_periods: min_periods,
2892
+ center: center
2893
+ )
2894
+ )
2895
+ .to_series
2896
+ end
2897
+
2898
+ # Compute a rolling skew.
2899
+ #
2900
+ # @param window_size [Integer]
2901
+ # Integer size of the rolling window.
2902
+ # @param bias [Boolean]
2903
+ # If false, the calculations are corrected for statistical bias.
2904
+ #
2905
+ # @return [Series]
2906
+ #
2907
+ # @example
2908
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2909
+ # s.rolling_skew(3)
2910
+ # # =>
2911
+ # # shape: (6,)
2912
+ # # Series: 'a' [f64]
2913
+ # # [
2914
+ # # null
2915
+ # # null
2916
+ # # 0.0
2917
+ # # 0.0
2918
+ # # 0.381802
2919
+ # # 0.0
2920
+ # # ]
2921
+ def rolling_skew(window_size, bias: true)
2922
+ super
2923
+ end
2924
+
2925
+ # Sample from this Series.
2926
+ #
2927
+ # @param n [Integer]
2928
+ # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
2929
+ # `frac` is None.
2930
+ # @param frac [Float]
2931
+ # Fraction of items to return. Cannot be used with `n`.
2932
+ # @param with_replacement [Boolean]
2933
+ # Allow values to be sampled more than once.
2934
+ # @param shuffle [Boolean]
2935
+ # Shuffle the order of sampled data points.
2936
+ # @param seed [Integer]
2937
+ # Seed for the random number generator. If set to None (default), a random
2938
+ # seed is used.
2939
+ #
2940
+ # @return [Series]
2941
+ #
2942
+ # @example
2943
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
2944
+ # s.sample(n: 2, seed: 0)
2945
+ # # =>
2946
+ # # shape: (2,)
2947
+ # # Series: 'a' [i64]
2948
+ # # [
2949
+ # # 1
2950
+ # # 5
2951
+ # # ]
2952
+ def sample(
2953
+ n: nil,
2954
+ frac: nil,
2955
+ with_replacement: false,
2956
+ shuffle: false,
2957
+ seed: nil
2958
+ )
2959
+ if !n.nil? && !frac.nil?
2960
+ raise ArgumentError, "cannot specify both `n` and `frac`"
2961
+ end
2962
+
2963
+ if n.nil? && !frac.nil?
2964
+ return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
2965
+ end
2966
+
2967
+ if n.nil?
2968
+ n = 1
2969
+ end
2970
+ Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
2971
+ end
2312
2972
 
2313
2973
  # Get a boolean mask of the local maximum peaks.
2314
2974
  #
@@ -2381,8 +3041,35 @@ module Polars
2381
3041
  end
2382
3042
  end
2383
3043
 
2384
- # def _hash
2385
- # end
3044
+ # Hash the Series.
3045
+ #
3046
+ # The hash value is of type `:u64`.
3047
+ #
3048
+ # @param seed [Integer]
3049
+ # Random seed parameter. Defaults to 0.
3050
+ # @param seed_1 [Integer]
3051
+ # Random seed parameter. Defaults to `seed` if not set.
3052
+ # @param seed_2 [Integer]
3053
+ # Random seed parameter. Defaults to `seed` if not set.
3054
+ # @param seed_3 [Integer]
3055
+ # Random seed parameter. Defaults to `seed` if not set.
3056
+ #
3057
+ # @return [Series]
3058
+ #
3059
+ # @example
3060
+ # s = Polars::Series.new("a", [1, 2, 3])
3061
+ # s._hash(42)
3062
+ # # =>
3063
+ # # shape: (3,)
3064
+ # # Series: 'a' [u64]
3065
+ # # [
3066
+ # # 2374023516666777365
3067
+ # # 10386026231460783898
3068
+ # # 17796317186427479491
3069
+ # # ]
3070
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
3071
+ super
3072
+ end
2386
3073
 
2387
3074
  # Reinterpret the underlying bits as a signed/unsigned integer.
2388
3075
  #
@@ -2425,8 +3112,61 @@ module Polars
2425
3112
  super
2426
3113
  end
2427
3114
 
2428
- # def rank
2429
- # end
3115
+ # Assign ranks to data, dealing with ties appropriately.
3116
+ #
3117
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
3118
+ # The method used to assign ranks to tied elements.
3119
+ # The following methods are available (default is 'average'):
3120
+ #
3121
+ # - 'average' : The average of the ranks that would have been assigned to
3122
+ # all the tied values is assigned to each value.
3123
+ # - 'min' : The minimum of the ranks that would have been assigned to all
3124
+ # the tied values is assigned to each value. (This is also referred to
3125
+ # as "competition" ranking.)
3126
+ # - 'max' : The maximum of the ranks that would have been assigned to all
3127
+ # the tied values is assigned to each value.
3128
+ # - 'dense' : Like 'min', but the rank of the next highest element is
3129
+ # assigned the rank immediately after those assigned to the tied
3130
+ # elements.
3131
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
3132
+ # the order that the values occur in the Series.
3133
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
3134
+ # on the order that the values occur in the Series.
3135
+ # @param reverse [Boolean]
3136
+ # Reverse the operation.
3137
+ #
3138
+ # @return [Series]
3139
+ #
3140
+ # @example The 'average' method:
3141
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3142
+ # s.rank
3143
+ # # =>
3144
+ # # shape: (5,)
3145
+ # # Series: 'a' [f32]
3146
+ # # [
3147
+ # # 3.0
3148
+ # # 4.5
3149
+ # # 1.5
3150
+ # # 1.5
3151
+ # # 4.5
3152
+ # # ]
3153
+ #
3154
+ # @example The 'ordinal' method:
3155
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3156
+ # s.rank(method: "ordinal")
3157
+ # # =>
3158
+ # # shape: (5,)
3159
+ # # Series: 'a' [u32]
3160
+ # # [
3161
+ # # 3
3162
+ # # 4
3163
+ # # 1
3164
+ # # 2
3165
+ # # 5
3166
+ # # ]
3167
+ def rank(method: "average", reverse: false)
3168
+ super
3169
+ end
2430
3170
 
2431
3171
  # Calculate the n-th discrete difference.
2432
3172
  #
@@ -2440,8 +3180,56 @@ module Polars
2440
3180
  super
2441
3181
  end
2442
3182
 
2443
- # def pct_change
2444
- # end
3183
+ # Computes percentage change between values.
3184
+ #
3185
+ # Percentage change (as fraction) between current element and most-recent
3186
+ # non-null element at least `n` period(s) before the current element.
3187
+ #
3188
+ # Computes the change from the previous row by default.
3189
+ #
3190
+ # @param n [Integer]
3191
+ # periods to shift for forming percent change.
3192
+ #
3193
+ # @return [Series]
3194
+ #
3195
+ # @example
3196
+ # Polars::Series.new(0..9).pct_change
3197
+ # # =>
3198
+ # # shape: (10,)
3199
+ # # Series: '' [f64]
3200
+ # # [
3201
+ # # null
3202
+ # # inf
3203
+ # # 1.0
3204
+ # # 0.5
3205
+ # # 0.333333
3206
+ # # 0.25
3207
+ # # 0.2
3208
+ # # 0.166667
3209
+ # # 0.142857
3210
+ # # 0.125
3211
+ # # ]
3212
+ #
3213
+ # @example
3214
+ # Polars::Series.new([1, 2, 4, 8, 16, 32, 64, 128, 256, 512]).pct_change(n: 2)
3215
+ # # =>
3216
+ # # shape: (10,)
3217
+ # # Series: '' [f64]
3218
+ # # [
3219
+ # # null
3220
+ # # null
3221
+ # # 3.0
3222
+ # # 3.0
3223
+ # # 3.0
3224
+ # # 3.0
3225
+ # # 3.0
3226
+ # # 3.0
3227
+ # # 3.0
3228
+ # # 3.0
3229
+ # # ]
3230
+ def pct_change(n: 1)
3231
+ super
3232
+ end
2445
3233
 
2446
3234
  # Compute the sample skewness of a data set.
2447
3235
  #
@@ -2571,14 +3359,49 @@ module Polars
2571
3359
  super
2572
3360
  end
2573
3361
 
2574
- # def ewm_mean
2575
- # end
3362
+ # Exponentially-weighted moving average.
3363
+ #
3364
+ # @return [Series]
3365
+ def ewm_mean(
3366
+ com: nil,
3367
+ span: nil,
3368
+ half_life: nil,
3369
+ alpha: nil,
3370
+ adjust: true,
3371
+ min_periods: 1
3372
+ )
3373
+ super
3374
+ end
2576
3375
 
2577
- # def ewm_std
2578
- # end
3376
+ # Exponentially-weighted moving standard deviation.
3377
+ #
3378
+ # @return [Series]
3379
+ def ewm_std(
3380
+ com: nil,
3381
+ span: nil,
3382
+ half_life: nil,
3383
+ alpha: nil,
3384
+ adjust: true,
3385
+ bias: false,
3386
+ min_periods: 1
3387
+ )
3388
+ super
3389
+ end
2579
3390
 
2580
- # def ewm_var
2581
- # end
3391
+ # Exponentially-weighted moving variance.
3392
+ #
3393
+ # @return [Series]
3394
+ def ewm_var(
3395
+ com: nil,
3396
+ span: nil,
3397
+ half_life: nil,
3398
+ alpha: nil,
3399
+ adjust: true,
3400
+ bias: false,
3401
+ min_periods: 1
3402
+ )
3403
+ super
3404
+ end
2582
3405
 
2583
3406
  # Extend the Series with given number of values.
2584
3407
  #
@@ -2645,20 +3468,40 @@ module Polars
2645
3468
  super
2646
3469
  end
2647
3470
 
2648
- # def arr
2649
- # end
3471
+ # Create an object namespace of all list related methods.
3472
+ #
3473
+ # @return [ListNameSpace]
3474
+ def arr
3475
+ ListNameSpace.new(self)
3476
+ end
2650
3477
 
2651
- # def cat
2652
- # end
3478
+ # Create an object namespace of all categorical related methods.
3479
+ #
3480
+ # @return [CatNameSpace]
3481
+ def cat
3482
+ CatNameSpace.new(self)
3483
+ end
2653
3484
 
2654
- # def dt
2655
- # end
3485
+ # Create an object namespace of all datetime related methods.
3486
+ #
3487
+ # @return [DateTimeNameSpace]
3488
+ def dt
3489
+ DateTimeNameSpace.new(self)
3490
+ end
2656
3491
 
2657
- # def str
2658
- # end
3492
+ # Create an object namespace of all string related methods.
3493
+ #
3494
+ # @return [StringNameSpace]
3495
+ def str
3496
+ StringNameSpace.new(self)
3497
+ end
2659
3498
 
2660
- # def struct
2661
- # end
3499
+ # Create an object namespace of all struct related methods.
3500
+ #
3501
+ # @return [StructNameSpace]
3502
+ def struct
3503
+ StructNameSpace.new(self)
3504
+ end
2662
3505
 
2663
3506
  private
2664
3507
 
@@ -2696,7 +3539,14 @@ module Polars
2696
3539
  return Utils.wrap_s(_s.send(op, other._s))
2697
3540
  end
2698
3541
 
2699
- raise Todo
3542
+ if other.is_a?(Date) || other.is_a?(DateTime) || other.is_a?(Time) || other.is_a?(String)
3543
+ raise Todo
3544
+ end
3545
+ if other.is_a?(Float) && !is_float
3546
+ raise Todo
3547
+ end
3548
+
3549
+ Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
2700
3550
  end
2701
3551
 
2702
3552
  def series_to_rbseries(name, values)
@@ -2751,6 +3601,10 @@ module Polars
2751
3601
 
2752
3602
  if ruby_dtype == Date
2753
3603
  RbSeries.new_opt_date(name, values, strict)
3604
+ elsif ruby_dtype == Time
3605
+ RbSeries.new_opt_datetime(name, values, strict)
3606
+ elsif ruby_dtype == DateTime
3607
+ RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
2754
3608
  else
2755
3609
  raise Todo
2756
3610
  end
@@ -2764,7 +3618,26 @@ module Polars
2764
3618
  raise Todo
2765
3619
  end
2766
3620
 
2767
- raise Todo
3621
+ if value.is_a?(Array)
3622
+ count = 0
3623
+ equal_to_inner = true
3624
+ values.each do |lst|
3625
+ lst.each do |vl|
3626
+ equal_to_inner = vl.class == nested_dtype
3627
+ if !equal_to_inner || count > 50
3628
+ break
3629
+ end
3630
+ count += 1
3631
+ end
3632
+ end
3633
+ if equal_to_inner
3634
+ dtype = Utils.rb_type_to_dtype(nested_dtype)
3635
+ # TODO rescue and fallback to new_object
3636
+ return RbSeries.new_list(name, values, dtype)
3637
+ end
3638
+ end
3639
+
3640
+ RbSeries.new_object(name, values, strict)
2768
3641
  else
2769
3642
  constructor = rb_type_to_constructor(value.class)
2770
3643
  constructor.call(name, values, strict)
@@ -2804,8 +3677,7 @@ module Polars
2804
3677
  def rb_type_to_constructor(dtype)
2805
3678
  RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
2806
3679
  rescue KeyError
2807
- # RbSeries.method(:new_object)
2808
- raise ArgumentError, "Cannot determine type"
3680
+ RbSeries.method(:new_object)
2809
3681
  end
2810
3682
 
2811
3683
  def _get_first_non_none(values)