polars-df 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/Cargo.lock +142 -11
  4. data/Cargo.toml +5 -0
  5. data/ext/polars/Cargo.toml +17 -1
  6. data/ext/polars/src/apply/dataframe.rs +292 -0
  7. data/ext/polars/src/apply/mod.rs +254 -0
  8. data/ext/polars/src/apply/series.rs +1173 -0
  9. data/ext/polars/src/conversion.rs +180 -5
  10. data/ext/polars/src/dataframe.rs +146 -1
  11. data/ext/polars/src/error.rs +12 -0
  12. data/ext/polars/src/lazy/apply.rs +34 -2
  13. data/ext/polars/src/lazy/dataframe.rs +74 -3
  14. data/ext/polars/src/lazy/dsl.rs +136 -0
  15. data/ext/polars/src/lib.rs +199 -1
  16. data/ext/polars/src/list_construction.rs +100 -0
  17. data/ext/polars/src/series.rs +331 -0
  18. data/ext/polars/src/utils.rs +25 -0
  19. data/lib/polars/cat_name_space.rb +54 -0
  20. data/lib/polars/convert.rb +100 -0
  21. data/lib/polars/data_frame.rb +1558 -60
  22. data/lib/polars/date_time_expr.rb +2 -2
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/dynamic_group_by.rb +49 -0
  25. data/lib/polars/expr.rb +4072 -107
  26. data/lib/polars/expr_dispatch.rb +8 -0
  27. data/lib/polars/functions.rb +192 -3
  28. data/lib/polars/group_by.rb +44 -3
  29. data/lib/polars/io.rb +20 -4
  30. data/lib/polars/lazy_frame.rb +800 -26
  31. data/lib/polars/lazy_functions.rb +687 -43
  32. data/lib/polars/lazy_group_by.rb +1 -0
  33. data/lib/polars/list_expr.rb +502 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/rolling_group_by.rb +35 -0
  36. data/lib/polars/series.rb +934 -62
  37. data/lib/polars/string_expr.rb +189 -13
  38. data/lib/polars/string_name_space.rb +690 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +44 -0
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +14 -1
  43. metadata +15 -3
data/lib/polars/series.rb CHANGED
@@ -3,9 +3,6 @@ module Polars
3
3
  class Series
4
4
  include ExprDispatch
5
5
 
6
- # @private
7
- attr_accessor :_s
8
-
9
6
  # Create a new Series.
10
7
  #
11
8
  # @param name [String, Array, nil]
@@ -26,7 +23,7 @@ module Polars
26
23
  # @example Constructing a Series by specifying name and values positionally:
27
24
  # s = Polars::Series.new("a", [1, 2, 3])
28
25
  #
29
- # @example Notice that the dtype is automatically inferred as a polars Int64:
26
+ # @example Notice that the dtype is automatically inferred as a polars `:i64`:
30
27
  # s.dtype
31
28
  # # => :i64
32
29
  #
@@ -48,6 +45,11 @@ module Polars
48
45
 
49
46
  name = "" if name.nil?
50
47
 
48
+ # TODO improve
49
+ if values.is_a?(Range) && values.begin.is_a?(String)
50
+ values = values.to_a
51
+ end
52
+
51
53
  if values.nil?
52
54
  self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
53
55
  elsif values.is_a?(Series)
@@ -730,8 +732,43 @@ module Polars
730
732
  Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
731
733
  end
732
734
 
733
- # def cumulative_eval
734
- # end
735
+ # Run an expression over a sliding window that increases `1` slot every iteration.
736
+ #
737
+ # @param expr [Expr]
738
+ # Expression to evaluate
739
+ # @param min_periods [Integer]
740
+ # Number of valid values there should be in the window before the expression
741
+ # is evaluated. valid values = `length - null_count`
742
+ # @param parallel [Boolean]
743
+ # Run in parallel. Don't do this in a groupby or another operation that
744
+ # already has much parallelization.
745
+ #
746
+ # @return [Series]
747
+ #
748
+ # @note
749
+ # This functionality is experimental and may change without it being considered a
750
+ # breaking change.
751
+ #
752
+ # @note
753
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
754
+ # for operations that visit all elements.
755
+ #
756
+ # @example
757
+ # s = Polars::Series.new("values", [1, 2, 3, 4, 5])
758
+ # s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
759
+ # # =>
760
+ # # shape: (5,)
761
+ # # Series: 'values' [f64]
762
+ # # [
763
+ # # 0.0
764
+ # # -3.0
765
+ # # -8.0
766
+ # # -15.0
767
+ # # -24.0
768
+ # # ]
769
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
770
+ super
771
+ end
735
772
 
736
773
  # Return a copy of the Series with a new alias/name.
737
774
  #
@@ -1436,8 +1473,57 @@ module Polars
1436
1473
  super
1437
1474
  end
1438
1475
 
1439
- # def is_in
1440
- # end
1476
+ # Check if elements of this Series are in the other Series.
1477
+ #
1478
+ # @return [Series]
1479
+ #
1480
+ # @example
1481
+ # s = Polars::Series.new("a", [1, 2, 3])
1482
+ # s2 = Polars::Series.new("b", [2, 4])
1483
+ # s2.is_in(s)
1484
+ # # =>
1485
+ # # shape: (2,)
1486
+ # # Series: 'b' [bool]
1487
+ # # [
1488
+ # # true
1489
+ # # false
1490
+ # # ]
1491
+ #
1492
+ # @example
1493
+ # sets = Polars::Series.new("sets", [[1, 2, 3], [1, 2], [9, 10]])
1494
+ # # =>
1495
+ # # shape: (3,)
1496
+ # # Series: 'sets' [list]
1497
+ # # [
1498
+ # # [1, 2, 3]
1499
+ # # [1, 2]
1500
+ # # [9, 10]
1501
+ # # ]
1502
+ #
1503
+ # @example
1504
+ # optional_members = Polars::Series.new("optional_members", [1, 2, 3])
1505
+ # # =>
1506
+ # # shape: (3,)
1507
+ # # Series: 'optional_members' [i64]
1508
+ # # [
1509
+ # # 1
1510
+ # # 2
1511
+ # # 3
1512
+ # # ]
1513
+ #
1514
+ # @example
1515
+ # optional_members.is_in(sets)
1516
+ # # =>
1517
+ # # shape: (3,)
1518
+ # # Series: 'optional_members' [bool]
1519
+ # # [
1520
+ # # true
1521
+ # # true
1522
+ # # false
1523
+ # # ]
1524
+ def is_in(other)
1525
+ super
1526
+ end
1441
1527
 
1442
1528
  # Get index values where Boolean Series evaluate `true`.
1443
1529
  #
@@ -1587,8 +1673,32 @@ module Polars
1587
1673
  super
1588
1674
  end
1589
1675
 
1590
- # def to_physical
1591
- # end
1676
+ # Cast to physical representation of the logical dtype.
1677
+ #
1678
+ # - `:date` -> `:i32`
1679
+ # - `:datetime` -> `:i64`
1680
+ # - `:time` -> `:i64`
1681
+ # - `:duration` -> `:i64`
1682
+ # - `:cat` -> `:u32`
1683
+ # - other data types will be left unchanged.
1684
+ #
1685
+ # @return [Series]
1686
+ #
1687
+ # @example
1688
+ # s = Polars::Series.new("values", ["a", nil, "x", "a"])
1689
+ # s.cast(:cat).to_physical
1690
+ # # =>
1691
+ # # shape: (4,)
1692
+ # # Series: 'values' [u32]
1693
+ # # [
1694
+ # # 0
1695
+ # # null
1696
+ # # 1
1697
+ # # 0
1698
+ # # ]
1699
+ def to_physical
1700
+ super
1701
+ end
1592
1702
 
1593
1703
  # Convert this Series to a Ruby Array. This operation clones data.
1594
1704
  #
@@ -1704,8 +1814,34 @@ module Polars
1704
1814
  # def to_numo
1705
1815
  # end
1706
1816
 
1707
- # def set
1708
- # end
1817
+ # Set masked values.
1818
+ #
1819
+ # @param filter [Series]
1820
+ # Boolean mask.
1821
+ # @param value [Object]
1822
+ # Value with which to replace the masked values.
1823
+ #
1824
+ # @return [Series]
1825
+ #
1826
+ # @note
1827
+ # Use of this function is frequently an anti-pattern, as it can
1828
+ # block optimization (predicate pushdown, etc). Consider using
1829
+ # `Polars.when(predicate).then(value).otherwise(self)` instead.
1830
+ #
1831
+ # @example
1832
+ # s = Polars::Series.new("a", [1, 2, 3])
1833
+ # s.set(s == 2, 10)
1834
+ # # =>
1835
+ # # shape: (3,)
1836
+ # # Series: 'a' [i64]
1837
+ # # [
1838
+ # # 1
1839
+ # # 10
1840
+ # # 3
1841
+ # # ]
1842
+ def set(filter, value)
1843
+ Utils.wrap_s(_s.send("set_with_mask_#{dtype}", filter._s, value))
1844
+ end
1709
1845
 
1710
1846
  # Set values at the index locations.
1711
1847
  #
@@ -1909,8 +2045,28 @@ module Polars
1909
2045
  super
1910
2046
  end
1911
2047
 
1912
- # def dot
1913
- # end
2048
+ # Compute the dot/inner product between two Series.
2049
+ #
2050
+ # @param other [Object]
2051
+ # Series (or array) to compute dot product with.
2052
+ #
2053
+ # @return [Numeric]
2054
+ #
2055
+ # @example
2056
+ # s = Polars::Series.new("a", [1, 2, 3])
2057
+ # s2 = Polars::Series.new("b", [4.0, 5.0, 6.0])
2058
+ # s.dot(s2)
2059
+ # # => 32.0
2060
+ def dot(other)
2061
+ if !other.is_a?(Series)
2062
+ other = Series.new(other)
2063
+ end
2064
+ if len != other.len
2065
+ n, m = len, other.len
2066
+ raise ArgumentError, "Series length mismatch: expected #{n}, found #{m}"
2067
+ end
2068
+ _s.dot(other._s)
2069
+ end
1914
2070
 
1915
2071
  # Compute the most occurring value(s).
1916
2072
  #
@@ -2185,8 +2341,41 @@ module Polars
2185
2341
  super
2186
2342
  end
2187
2343
 
2188
- # def apply
2189
- # end
2344
+ # Apply a custom/user-defined function (UDF) over elements in this Series and
2345
+ # return a new Series.
2346
+ #
2347
+ # If the function returns another datatype, the return_dtype arg should be set,
2348
+ # otherwise the method will fail.
2349
+ #
2350
+ # @param return_dtype [Symbol]
2351
+ # Output datatype. If none is given, the same datatype as this Series will be
2352
+ # used.
2353
+ # @param skip_nulls [Boolean]
2354
+ # Nulls will be skipped and not passed to the Ruby function.
2355
+ # This is faster because Ruby can be skipped and because we call
2356
+ # more specialized functions.
2357
+ #
2358
+ # @return [Series]
2359
+ #
2360
+ # @example
2361
+ # s = Polars::Series.new("a", [1, 2, 3])
2362
+ # s.apply { |x| x + 10 }
2363
+ # # =>
2364
+ # # shape: (3,)
2365
+ # # Series: 'a' [i64]
2366
+ # # [
2367
+ # # 11
2368
+ # # 12
2369
+ # # 13
2370
+ # # ]
2371
+ def apply(return_dtype: nil, skip_nulls: true, &func)
2372
+ if return_dtype.nil?
2373
+ pl_return_dtype = nil
2374
+ else
2375
+ pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
2376
+ end
2377
+ Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2378
+ end
2190
2379
 
2191
2380
  # Shift the values by a given period.
2192
2381
  #
@@ -2277,38 +2466,509 @@ module Polars
2277
2466
  Utils.wrap_s(_s.zip_with(mask._s, other._s))
2278
2467
  end
2279
2468
 
2280
- # def rolling_min
2281
- # end
2469
+ # Apply a rolling min (moving min) over the values in this array.
2470
+ #
2471
+ # A window of length `window_size` will traverse the array. The values that fill
2472
+ # this window will (optionally) be multiplied with the weights given by the
2473
+ # `weight` vector. The resulting values will be aggregated to their sum.
2474
+ #
2475
+ # @param window_size [Integer]
2476
+ # The length of the window.
2477
+ # @param weights [Array]
2478
+ # An optional slice with the same length as the window that will be multiplied
2479
+ # elementwise with the values in the window.
2480
+ # @param min_periods [Integer]
2481
+ # The number of values in the window that should be non-null before computing
2482
+ # a result. If None, it will be set equal to window size.
2483
+ # @param center [Boolean]
2484
+ # Set the labels at the center of the window
2485
+ #
2486
+ # @return [Series]
2487
+ #
2488
+ # @example
2489
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
2490
+ # s.rolling_min(3)
2491
+ # # =>
2492
+ # # shape: (5,)
2493
+ # # Series: 'a' [i64]
2494
+ # # [
2495
+ # # null
2496
+ # # null
2497
+ # # 100
2498
+ # # 200
2499
+ # # 300
2500
+ # # ]
2501
+ def rolling_min(
2502
+ window_size,
2503
+ weights: nil,
2504
+ min_periods: nil,
2505
+ center: false
2506
+ )
2507
+ to_frame
2508
+ .select(
2509
+ Polars.col(name).rolling_min(
2510
+ window_size,
2511
+ weights: weights,
2512
+ min_periods: min_periods,
2513
+ center: center
2514
+ )
2515
+ )
2516
+ .to_series
2517
+ end
2282
2518
 
2283
- # def rolling_max
2284
- # end
2519
+ # Apply a rolling max (moving max) over the values in this array.
2520
+ #
2521
+ # A window of length `window_size` will traverse the array. The values that fill
2522
+ # this window will (optionally) be multiplied with the weights given by the
2523
+ # `weight` vector. The resulting values will be aggregated to their sum.
2524
+ #
2525
+ # @param window_size [Integer]
2526
+ # The length of the window.
2527
+ # @param weights [Array]
2528
+ # An optional slice with the same length as the window that will be multiplied
2529
+ # elementwise with the values in the window.
2530
+ # @param min_periods [Integer]
2531
+ # The number of values in the window that should be non-null before computing
2532
+ # a result. If None, it will be set equal to window size.
2533
+ # @param center [Boolean]
2534
+ # Set the labels at the center of the window
2535
+ #
2536
+ # @return [Series]
2537
+ #
2538
+ # @example
2539
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
2540
+ # s.rolling_max(2)
2541
+ # # =>
2542
+ # # shape: (5,)
2543
+ # # Series: 'a' [i64]
2544
+ # # [
2545
+ # # null
2546
+ # # 200
2547
+ # # 300
2548
+ # # 400
2549
+ # # 500
2550
+ # # ]
2551
+ def rolling_max(
2552
+ window_size,
2553
+ weights: nil,
2554
+ min_periods: nil,
2555
+ center: false
2556
+ )
2557
+ to_frame
2558
+ .select(
2559
+ Polars.col(name).rolling_max(
2560
+ window_size,
2561
+ weights: weights,
2562
+ min_periods: min_periods,
2563
+ center: center
2564
+ )
2565
+ )
2566
+ .to_series
2567
+ end
2285
2568
 
2286
- # def rolling_mean
2287
- # end
2569
+ # Apply a rolling mean (moving mean) over the values in this array.
2570
+ #
2571
+ # A window of length `window_size` will traverse the array. The values that fill
2572
+ # this window will (optionally) be multiplied with the weights given by the
2573
+ # `weight` vector. The resulting values will be aggregated to their sum.
2574
+ #
2575
+ # @param window_size [Integer]
2576
+ # The length of the window.
2577
+ # @param weights [Array]
2578
+ # An optional slice with the same length as the window that will be multiplied
2579
+ # elementwise with the values in the window.
2580
+ # @param min_periods [Integer]
2581
+ # The number of values in the window that should be non-null before computing
2582
+ # a result. If None, it will be set equal to window size.
2583
+ # @param center [Boolean]
2584
+ # Set the labels at the center of the window
2585
+ #
2586
+ # @return [Series]
2587
+ #
2588
+ # @example
2589
+ # s = Polars::Series.new("a", [100, 200, 300, 400, 500])
2590
+ # s.rolling_mean(2)
2591
+ # # =>
2592
+ # # shape: (5,)
2593
+ # # Series: 'a' [f64]
2594
+ # # [
2595
+ # # null
2596
+ # # 150.0
2597
+ # # 250.0
2598
+ # # 350.0
2599
+ # # 450.0
2600
+ # # ]
2601
+ def rolling_mean(
2602
+ window_size,
2603
+ weights: nil,
2604
+ min_periods: nil,
2605
+ center: false
2606
+ )
2607
+ to_frame
2608
+ .select(
2609
+ Polars.col(name).rolling_mean(
2610
+ window_size,
2611
+ weights: weights,
2612
+ min_periods: min_periods,
2613
+ center: center
2614
+ )
2615
+ )
2616
+ .to_series
2617
+ end
2288
2618
 
2289
- # def rolling_sum
2290
- # end
2619
+ # Apply a rolling sum (moving sum) over the values in this array.
2620
+ #
2621
+ # A window of length `window_size` will traverse the array. The values that fill
2622
+ # this window will (optionally) be multiplied with the weights given by the
2623
+ # `weight` vector. The resulting values will be aggregated to their sum.
2624
+ #
2625
+ # @param window_size [Integer]
2626
+ # The length of the window.
2627
+ # @param weights [Array]
2628
+ # An optional slice with the same length as the window that will be multiplied
2629
+ # elementwise with the values in the window.
2630
+ # @param min_periods [Integer]
2631
+ # The number of values in the window that should be non-null before computing
2632
+ # a result. If None, it will be set equal to window size.
2633
+ # @param center [Boolean]
2634
+ # Set the labels at the center of the window
2635
+ #
2636
+ # @return [Series]
2637
+ #
2638
+ # @example
2639
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
2640
+ # s.rolling_sum(2)
2641
+ # # =>
2642
+ # # shape: (5,)
2643
+ # # Series: 'a' [i64]
2644
+ # # [
2645
+ # # null
2646
+ # # 3
2647
+ # # 5
2648
+ # # 7
2649
+ # # 9
2650
+ # # ]
2651
+ def rolling_sum(
2652
+ window_size,
2653
+ weights: nil,
2654
+ min_periods: nil,
2655
+ center: false
2656
+ )
2657
+ to_frame
2658
+ .select(
2659
+ Polars.col(name).rolling_sum(
2660
+ window_size,
2661
+ weights: weights,
2662
+ min_periods: min_periods,
2663
+ center: center
2664
+ )
2665
+ )
2666
+ .to_series
2667
+ end
2291
2668
 
2292
- # def rolling_std
2293
- # end
2669
+ # Compute a rolling std dev.
2670
+ #
2671
+ # A window of length `window_size` will traverse the array. The values that fill
2672
+ # this window will (optionally) be multiplied with the weights given by the
2673
+ # `weight` vector. The resulting values will be aggregated to their sum.
2674
+ #
2675
+ # @param window_size [Integer]
2676
+ # The length of the window.
2677
+ # @param weights [Array]
2678
+ # An optional slice with the same length as the window that will be multiplied
2679
+ # elementwise with the values in the window.
2680
+ # @param min_periods [Integer]
2681
+ # The number of values in the window that should be non-null before computing
2682
+ # a result. If None, it will be set equal to window size.
2683
+ # @param center [Boolean]
2684
+ # Set the labels at the center of the window
2685
+ #
2686
+ # @return [Series]
2687
+ #
2688
+ # @example
2689
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2690
+ # s.rolling_std(3)
2691
+ # # =>
2692
+ # # shape: (6,)
2693
+ # # Series: 'a' [f64]
2694
+ # # [
2695
+ # # null
2696
+ # # null
2697
+ # # 1.0
2698
+ # # 1.0
2699
+ # # 1.527525
2700
+ # # 2.0
2701
+ # # ]
2702
+ def rolling_std(
2703
+ window_size,
2704
+ weights: nil,
2705
+ min_periods: nil,
2706
+ center: false
2707
+ )
2708
+ to_frame
2709
+ .select(
2710
+ Polars.col(name).rolling_std(
2711
+ window_size,
2712
+ weights: weights,
2713
+ min_periods: min_periods,
2714
+ center: center
2715
+ )
2716
+ )
2717
+ .to_series
2718
+ end
2294
2719
 
2295
- # def rolling_var
2296
- # end
2720
+ # Compute a rolling variance.
2721
+ #
2722
+ # A window of length `window_size` will traverse the array. The values that fill
2723
+ # this window will (optionally) be multiplied with the weights given by the
2724
+ # `weight` vector. The resulting values will be aggregated to their sum.
2725
+ #
2726
+ # @param window_size [Integer]
2727
+ # The length of the window.
2728
+ # @param weights [Array]
2729
+ # An optional slice with the same length as the window that will be multiplied
2730
+ # elementwise with the values in the window.
2731
+ # @param min_periods [Integer]
2732
+ # The number of values in the window that should be non-null before computing
2733
+ # a result. If None, it will be set equal to window size.
2734
+ # @param center [Boolean]
2735
+ # Set the labels at the center of the window
2736
+ #
2737
+ # @return [Series]
2738
+ #
2739
+ # @example
2740
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2741
+ # s.rolling_var(3)
2742
+ # # =>
2743
+ # # shape: (6,)
2744
+ # # Series: 'a' [f64]
2745
+ # # [
2746
+ # # null
2747
+ # # null
2748
+ # # 1.0
2749
+ # # 1.0
2750
+ # # 2.333333
2751
+ # # 4.0
2752
+ # # ]
2753
+ def rolling_var(
2754
+ window_size,
2755
+ weights: nil,
2756
+ min_periods: nil,
2757
+ center: false
2758
+ )
2759
+ to_frame
2760
+ .select(
2761
+ Polars.col(name).rolling_var(
2762
+ window_size,
2763
+ weights: weights,
2764
+ min_periods: min_periods,
2765
+ center: center
2766
+ )
2767
+ )
2768
+ .to_series
2769
+ end
2297
2770
 
2298
2771
  # def rolling_apply
2299
2772
  # end
2300
2773
 
2301
- # def rolling_median
2302
- # end
2774
+ # Compute a rolling median.
2775
+ #
2776
+ # @param window_size [Integer]
2777
+ # The length of the window.
2778
+ # @param weights [Array]
2779
+ # An optional slice with the same length as the window that will be multiplied
2780
+ # elementwise with the values in the window.
2781
+ # @param min_periods [Integer]
2782
+ # The number of values in the window that should be non-null before computing
2783
+ # a result. If None, it will be set equal to window size.
2784
+ # @param center [Boolean]
2785
+ # Set the labels at the center of the window
2786
+ #
2787
+ # @return [Series]
2788
+ #
2789
+ # @example
2790
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2791
+ # s.rolling_median(3)
2792
+ # # =>
2793
+ # # shape: (6,)
2794
+ # # Series: 'a' [f64]
2795
+ # # [
2796
+ # # null
2797
+ # # null
2798
+ # # 2.0
2799
+ # # 3.0
2800
+ # # 4.0
2801
+ # # 6.0
2802
+ # # ]
2803
+ def rolling_median(
2804
+ window_size,
2805
+ weights: nil,
2806
+ min_periods: nil,
2807
+ center: false
2808
+ )
2809
+ if min_periods.nil?
2810
+ min_periods = window_size
2811
+ end
2303
2812
 
2304
- # def rolling_quantile
2305
- # end
2813
+ to_frame
2814
+ .select(
2815
+ Polars.col(name).rolling_median(
2816
+ window_size,
2817
+ weights: weights,
2818
+ min_periods: min_periods,
2819
+ center: center
2820
+ )
2821
+ )
2822
+ .to_series
2823
+ end
2306
2824
 
2307
- # def rolling_skew
2308
- # end
2825
+ # Compute a rolling quantile.
2826
+ #
2827
+ # @param quantile [Float]
2828
+ # Quantile between 0.0 and 1.0.
2829
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
2830
+ # Interpolation method.
2831
+ # @param window_size [Integer]
2832
+ # The length of the window.
2833
+ # @param weights [Array]
2834
+ # An optional slice with the same length as the window that will be multiplied
2835
+ # elementwise with the values in the window.
2836
+ # @param min_periods [Integer]
2837
+ # The number of values in the window that should be non-null before computing
2838
+ # a result. If None, it will be set equal to window size.
2839
+ # @param center [Boolean]
2840
+ # Set the labels at the center of the window
2841
+ #
2842
+ # @return [Series]
2843
+ #
2844
+ # @example
2845
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2846
+ # s.rolling_quantile(0.33, window_size: 3)
2847
+ # # =>
2848
+ # # shape: (6,)
2849
+ # # Series: 'a' [f64]
2850
+ # # [
2851
+ # # null
2852
+ # # null
2853
+ # # 1.0
2854
+ # # 2.0
2855
+ # # 3.0
2856
+ # # 4.0
2857
+ # # ]
2858
+ #
2859
+ # @example
2860
+ # s.rolling_quantile(0.33, interpolation: "linear", window_size: 3)
2861
+ # # =>
2862
+ # # shape: (6,)
2863
+ # # Series: 'a' [f64]
2864
+ # # [
2865
+ # # null
2866
+ # # null
2867
+ # # 1.66
2868
+ # # 2.66
2869
+ # # 3.66
2870
+ # # 5.32
2871
+ # # ]
2872
+ def rolling_quantile(
2873
+ quantile,
2874
+ interpolation: "nearest",
2875
+ window_size: 2,
2876
+ weights: nil,
2877
+ min_periods: nil,
2878
+ center: false
2879
+ )
2880
+ if min_periods.nil?
2881
+ min_periods = window_size
2882
+ end
2309
2883
 
2310
- # def sample
2311
- # end
2884
+ to_frame
2885
+ .select(
2886
+ Polars.col(name).rolling_quantile(
2887
+ quantile,
2888
+ interpolation: interpolation,
2889
+ window_size: window_size,
2890
+ weights: weights,
2891
+ min_periods: min_periods,
2892
+ center: center
2893
+ )
2894
+ )
2895
+ .to_series
2896
+ end
2897
+
2898
+ # Compute a rolling skew.
2899
+ #
2900
+ # @param window_size [Integer]
2901
+ # Integer size of the rolling window.
2902
+ # @param bias [Boolean]
2903
+ # If false, the calculations are corrected for statistical bias.
2904
+ #
2905
+ # @return [Series]
2906
+ #
2907
+ # @example
2908
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
2909
+ # s.rolling_skew(3)
2910
+ # # =>
2911
+ # # shape: (6,)
2912
+ # # Series: 'a' [f64]
2913
+ # # [
2914
+ # # null
2915
+ # # null
2916
+ # # 0.0
2917
+ # # 0.0
2918
+ # # 0.381802
2919
+ # # 0.0
2920
+ # # ]
2921
+ def rolling_skew(window_size, bias: true)
2922
+ super
2923
+ end
2924
+
2925
+ # Sample from this Series.
2926
+ #
2927
+ # @param n [Integer]
2928
+ # Number of items to return. Cannot be used with `frac`. Defaults to 1 if
2929
+ # `frac` is None.
2930
+ # @param frac [Float]
2931
+ # Fraction of items to return. Cannot be used with `n`.
2932
+ # @param with_replacement [Boolean]
2933
+ # Allow values to be sampled more than once.
2934
+ # @param shuffle [Boolean]
2935
+ # Shuffle the order of sampled data points.
2936
+ # @param seed [Integer]
2937
+ # Seed for the random number generator. If set to None (default), a random
2938
+ # seed is used.
2939
+ #
2940
+ # @return [Series]
2941
+ #
2942
+ # @example
2943
+ # s = Polars::Series.new("a", [1, 2, 3, 4, 5])
2944
+ # s.sample(n: 2, seed: 0)
2945
+ # # =>
2946
+ # # shape: (2,)
2947
+ # # Series: 'a' [i64]
2948
+ # # [
2949
+ # # 1
2950
+ # # 5
2951
+ # # ]
2952
+ def sample(
2953
+ n: nil,
2954
+ frac: nil,
2955
+ with_replacement: false,
2956
+ shuffle: false,
2957
+ seed: nil
2958
+ )
2959
+ if !n.nil? && !frac.nil?
2960
+ raise ArgumentError, "cannot specify both `n` and `frac`"
2961
+ end
2962
+
2963
+ if n.nil? && !frac.nil?
2964
+ return Utils.wrap_s(_s.sample_frac(frac, with_replacement, shuffle, seed))
2965
+ end
2966
+
2967
+ if n.nil?
2968
+ n = 1
2969
+ end
2970
+ Utils.wrap_s(_s.sample_n(n, with_replacement, shuffle, seed))
2971
+ end
2312
2972
 
2313
2973
  # Get a boolean mask of the local maximum peaks.
2314
2974
  #
@@ -2381,8 +3041,35 @@ module Polars
2381
3041
  end
2382
3042
  end
2383
3043
 
2384
- # def _hash
2385
- # end
3044
+ # Hash the Series.
3045
+ #
3046
+ # The hash value is of type `:u64`.
3047
+ #
3048
+ # @param seed [Integer]
3049
+ # Random seed parameter. Defaults to 0.
3050
+ # @param seed_1 [Integer]
3051
+ # Random seed parameter. Defaults to `seed` if not set.
3052
+ # @param seed_2 [Integer]
3053
+ # Random seed parameter. Defaults to `seed` if not set.
3054
+ # @param seed_3 [Integer]
3055
+ # Random seed parameter. Defaults to `seed` if not set.
3056
+ #
3057
+ # @return [Series]
3058
+ #
3059
+ # @example
3060
+ # s = Polars::Series.new("a", [1, 2, 3])
3061
+ # s._hash(42)
3062
+ # # =>
3063
+ # # shape: (3,)
3064
+ # # Series: 'a' [u64]
3065
+ # # [
3066
+ # # 2374023516666777365
3067
+ # # 10386026231460783898
3068
+ # # 17796317186427479491
3069
+ # # ]
3070
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
3071
+ super
3072
+ end
2386
3073
 
2387
3074
  # Reinterpret the underlying bits as a signed/unsigned integer.
2388
3075
  #
@@ -2425,8 +3112,61 @@ module Polars
2425
3112
  super
2426
3113
  end
2427
3114
 
2428
- # def rank
2429
- # end
3115
+ # Assign ranks to data, dealing with ties appropriately.
3116
+ #
3117
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
3118
+ # The method used to assign ranks to tied elements.
3119
+ # The following methods are available (default is 'average'):
3120
+ #
3121
+ # - 'average' : The average of the ranks that would have been assigned to
3122
+ # all the tied values is assigned to each value.
3123
+ # - 'min' : The minimum of the ranks that would have been assigned to all
3124
+ # the tied values is assigned to each value. (This is also referred to
3125
+ # as "competition" ranking.)
3126
+ # - 'max' : The maximum of the ranks that would have been assigned to all
3127
+ # the tied values is assigned to each value.
3128
+ # - 'dense' : Like 'min', but the rank of the next highest element is
3129
+ # assigned the rank immediately after those assigned to the tied
3130
+ # elements.
3131
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
3132
+ # the order that the values occur in the Series.
3133
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
3134
+ # on the order that the values occur in the Series.
3135
+ # @param reverse [Boolean]
3136
+ # Reverse the operation.
3137
+ #
3138
+ # @return [Series]
3139
+ #
3140
+ # @example The 'average' method:
3141
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3142
+ # s.rank
3143
+ # # =>
3144
+ # # shape: (5,)
3145
+ # # Series: 'a' [f32]
3146
+ # # [
3147
+ # # 3.0
3148
+ # # 4.5
3149
+ # # 1.5
3150
+ # # 1.5
3151
+ # # 4.5
3152
+ # # ]
3153
+ #
3154
+ # @example The 'ordinal' method:
3155
+ # s = Polars::Series.new("a", [3, 6, 1, 1, 6])
3156
+ # s.rank(method: "ordinal")
3157
+ # # =>
3158
+ # # shape: (5,)
3159
+ # # Series: 'a' [u32]
3160
+ # # [
3161
+ # # 3
3162
+ # # 4
3163
+ # # 1
3164
+ # # 2
3165
+ # # 5
3166
+ # # ]
3167
+ def rank(method: "average", reverse: false)
3168
+ super
3169
+ end
2430
3170
 
2431
3171
  # Calculate the n-th discrete difference.
2432
3172
  #
@@ -2440,8 +3180,56 @@ module Polars
2440
3180
  super
2441
3181
  end
2442
3182
 
2443
- # def pct_change
2444
- # end
3183
+ # Computes percentage change between values.
3184
+ #
3185
+ # Percentage change (as fraction) between current element and most-recent
3186
+ # non-null element at least `n` period(s) before the current element.
3187
+ #
3188
+ # Computes the change from the previous row by default.
3189
+ #
3190
+ # @param n [Integer]
3191
+ # periods to shift for forming percent change.
3192
+ #
3193
+ # @return [Series]
3194
+ #
3195
+ # @example
3196
+ # Polars::Series.new(0..9).pct_change
3197
+ # # =>
3198
+ # # shape: (10,)
3199
+ # # Series: '' [f64]
3200
+ # # [
3201
+ # # null
3202
+ # # inf
3203
+ # # 1.0
3204
+ # # 0.5
3205
+ # # 0.333333
3206
+ # # 0.25
3207
+ # # 0.2
3208
+ # # 0.166667
3209
+ # # 0.142857
3210
+ # # 0.125
3211
+ # # ]
3212
+ #
3213
+ # @example
3214
+ # Polars::Series.new([1, 2, 4, 8, 16, 32, 64, 128, 256, 512]).pct_change(n: 2)
3215
+ # # =>
3216
+ # # shape: (10,)
3217
+ # # Series: '' [f64]
3218
+ # # [
3219
+ # # null
3220
+ # # null
3221
+ # # 3.0
3222
+ # # 3.0
3223
+ # # 3.0
3224
+ # # 3.0
3225
+ # # 3.0
3226
+ # # 3.0
3227
+ # # 3.0
3228
+ # # 3.0
3229
+ # # ]
3230
+ def pct_change(n: 1)
3231
+ super
3232
+ end
2445
3233
 
2446
3234
  # Compute the sample skewness of a data set.
2447
3235
  #
@@ -2571,14 +3359,49 @@ module Polars
2571
3359
  super
2572
3360
  end
2573
3361
 
2574
- # def ewm_mean
2575
- # end
3362
+ # Exponentially-weighted moving average.
3363
+ #
3364
+ # @return [Series]
3365
+ def ewm_mean(
3366
+ com: nil,
3367
+ span: nil,
3368
+ half_life: nil,
3369
+ alpha: nil,
3370
+ adjust: true,
3371
+ min_periods: 1
3372
+ )
3373
+ super
3374
+ end
2576
3375
 
2577
- # def ewm_std
2578
- # end
3376
+ # Exponentially-weighted moving standard deviation.
3377
+ #
3378
+ # @return [Series]
3379
+ def ewm_std(
3380
+ com: nil,
3381
+ span: nil,
3382
+ half_life: nil,
3383
+ alpha: nil,
3384
+ adjust: true,
3385
+ bias: false,
3386
+ min_periods: 1
3387
+ )
3388
+ super
3389
+ end
2579
3390
 
2580
- # def ewm_var
2581
- # end
3391
+ # Exponentially-weighted moving variance.
3392
+ #
3393
+ # @return [Series]
3394
+ def ewm_var(
3395
+ com: nil,
3396
+ span: nil,
3397
+ half_life: nil,
3398
+ alpha: nil,
3399
+ adjust: true,
3400
+ bias: false,
3401
+ min_periods: 1
3402
+ )
3403
+ super
3404
+ end
2582
3405
 
2583
3406
  # Extend the Series with given number of values.
2584
3407
  #
@@ -2645,20 +3468,40 @@ module Polars
2645
3468
  super
2646
3469
  end
2647
3470
 
2648
- # def arr
2649
- # end
3471
+ # Create an object namespace of all list related methods.
3472
+ #
3473
+ # @return [ListNameSpace]
3474
+ def arr
3475
+ ListNameSpace.new(self)
3476
+ end
2650
3477
 
2651
- # def cat
2652
- # end
3478
+ # Create an object namespace of all categorical related methods.
3479
+ #
3480
+ # @return [CatNameSpace]
3481
+ def cat
3482
+ CatNameSpace.new(self)
3483
+ end
2653
3484
 
2654
- # def dt
2655
- # end
3485
+ # Create an object namespace of all datetime related methods.
3486
+ #
3487
+ # @return [DateTimeNameSpace]
3488
+ def dt
3489
+ DateTimeNameSpace.new(self)
3490
+ end
2656
3491
 
2657
- # def str
2658
- # end
3492
+ # Create an object namespace of all string related methods.
3493
+ #
3494
+ # @return [StringNameSpace]
3495
+ def str
3496
+ StringNameSpace.new(self)
3497
+ end
2659
3498
 
2660
- # def struct
2661
- # end
3499
+ # Create an object namespace of all struct related methods.
3500
+ #
3501
+ # @return [StructNameSpace]
3502
+ def struct
3503
+ StructNameSpace.new(self)
3504
+ end
2662
3505
 
2663
3506
  private
2664
3507
 
@@ -2696,7 +3539,14 @@ module Polars
2696
3539
  return Utils.wrap_s(_s.send(op, other._s))
2697
3540
  end
2698
3541
 
2699
- raise Todo
3542
+ if other.is_a?(Date) || other.is_a?(DateTime) || other.is_a?(Time) || other.is_a?(String)
3543
+ raise Todo
3544
+ end
3545
+ if other.is_a?(Float) && !is_float
3546
+ raise Todo
3547
+ end
3548
+
3549
+ Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
2700
3550
  end
2701
3551
 
2702
3552
  def series_to_rbseries(name, values)
@@ -2751,6 +3601,10 @@ module Polars
2751
3601
 
2752
3602
  if ruby_dtype == Date
2753
3603
  RbSeries.new_opt_date(name, values, strict)
3604
+ elsif ruby_dtype == Time
3605
+ RbSeries.new_opt_datetime(name, values, strict)
3606
+ elsif ruby_dtype == DateTime
3607
+ RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
2754
3608
  else
2755
3609
  raise Todo
2756
3610
  end
@@ -2764,7 +3618,26 @@ module Polars
2764
3618
  raise Todo
2765
3619
  end
2766
3620
 
2767
- raise Todo
3621
+ if value.is_a?(Array)
3622
+ count = 0
3623
+ equal_to_inner = true
3624
+ values.each do |lst|
3625
+ lst.each do |vl|
3626
+ equal_to_inner = vl.class == nested_dtype
3627
+ if !equal_to_inner || count > 50
3628
+ break
3629
+ end
3630
+ count += 1
3631
+ end
3632
+ end
3633
+ if equal_to_inner
3634
+ dtype = Utils.rb_type_to_dtype(nested_dtype)
3635
+ # TODO rescue and fallback to new_object
3636
+ return RbSeries.new_list(name, values, dtype)
3637
+ end
3638
+ end
3639
+
3640
+ RbSeries.new_object(name, values, strict)
2768
3641
  else
2769
3642
  constructor = rb_type_to_constructor(value.class)
2770
3643
  constructor.call(name, values, strict)
@@ -2804,8 +3677,7 @@ module Polars
2804
3677
  def rb_type_to_constructor(dtype)
2805
3678
  RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
2806
3679
  rescue KeyError
2807
- # RbSeries.method(:new_object)
2808
- raise ArgumentError, "Cannot determine type"
3680
+ RbSeries.method(:new_object)
2809
3681
  end
2810
3682
 
2811
3683
  def _get_first_non_none(values)