polars-df 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +284 -216
- data/ext/polars/Cargo.toml +7 -4
- data/ext/polars/src/batched_csv.rs +2 -3
- data/ext/polars/src/conversion.rs +18 -17
- data/ext/polars/src/dataframe.rs +27 -63
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/general.rs +63 -4
- data/ext/polars/src/expr/rolling.rs +15 -10
- data/ext/polars/src/expr/string.rs +9 -9
- data/ext/polars/src/functions/range.rs +5 -10
- data/ext/polars/src/lazyframe.rs +28 -19
- data/ext/polars/src/lib.rs +20 -20
- data/ext/polars/src/map/dataframe.rs +1 -1
- data/ext/polars/src/map/mod.rs +2 -2
- data/ext/polars/src/map/series.rs +6 -6
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +1 -1
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/{series.rs → series/mod.rs} +21 -18
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/data_frame.rb +69 -65
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/expr.rb +223 -18
- data/lib/polars/group_by.rb +1 -1
- data/lib/polars/io.rb +4 -4
- data/lib/polars/lazy_frame.rb +23 -23
- data/lib/polars/lazy_functions.rb +4 -20
- data/lib/polars/series.rb +289 -30
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +13 -13
- data/lib/polars/version.rb +1 -1
- metadata +7 -6
data/lib/polars/expr.rb
CHANGED
@@ -366,7 +366,7 @@ module Polars
|
|
366
366
|
# # │ 3 ┆ 1.5 │
|
367
367
|
# # └─────┴──────┘
|
368
368
|
def exclude(columns)
|
369
|
-
if columns.is_a?(String)
|
369
|
+
if columns.is_a?(::String)
|
370
370
|
columns = [columns]
|
371
371
|
return wrap_expr(_rbexpr.exclude(columns))
|
372
372
|
elsif !columns.is_a?(::Array)
|
@@ -374,11 +374,11 @@ module Polars
|
|
374
374
|
return wrap_expr(_rbexpr.exclude_dtype(columns))
|
375
375
|
end
|
376
376
|
|
377
|
-
if !columns.all? { |a| a.is_a?(String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
|
377
|
+
if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
|
378
378
|
raise ArgumentError, "input should be all string or all DataType"
|
379
379
|
end
|
380
380
|
|
381
|
-
if columns[0].is_a?(String)
|
381
|
+
if columns[0].is_a?(::String)
|
382
382
|
wrap_expr(_rbexpr.exclude(columns))
|
383
383
|
else
|
384
384
|
wrap_expr(_rbexpr.exclude_dtype(columns))
|
@@ -721,13 +721,13 @@ module Polars
|
|
721
721
|
# # │ 3 ┆ 3 │
|
722
722
|
# # └─────┴─────┘
|
723
723
|
def count
|
724
|
-
|
724
|
+
warn "`Expr#count` will exclude null values in 0.9.0. Use `Expr#length` instead."
|
725
|
+
# wrap_expr(_rbexpr.count)
|
726
|
+
wrap_expr(_rbexpr.len)
|
725
727
|
end
|
726
728
|
|
727
729
|
# Count the number of values in this expression.
|
728
730
|
#
|
729
|
-
# Alias for {#count}.
|
730
|
-
#
|
731
731
|
# @return [Expr]
|
732
732
|
#
|
733
733
|
# @example
|
@@ -743,8 +743,9 @@ module Polars
|
|
743
743
|
# # │ 3 ┆ 3 │
|
744
744
|
# # └─────┴─────┘
|
745
745
|
def len
|
746
|
-
|
746
|
+
wrap_expr(_rbexpr.len)
|
747
747
|
end
|
748
|
+
alias_method :length, :len
|
748
749
|
|
749
750
|
# Get a slice of this expression.
|
750
751
|
#
|
@@ -2423,7 +2424,7 @@ module Polars
|
|
2423
2424
|
# # │ --- │
|
2424
2425
|
# # │ f64 │
|
2425
2426
|
# # ╞═════╡
|
2426
|
-
# # │
|
2427
|
+
# # │ 2.0 │
|
2427
2428
|
# # └─────┘
|
2428
2429
|
#
|
2429
2430
|
# @example
|
@@ -2478,6 +2479,206 @@ module Polars
|
|
2478
2479
|
wrap_expr(_rbexpr.quantile(quantile._rbexpr, interpolation))
|
2479
2480
|
end
|
2480
2481
|
|
2482
|
+
# Bin continuous values into discrete categories.
|
2483
|
+
#
|
2484
|
+
# @param breaks [Array]
|
2485
|
+
# List of unique cut points.
|
2486
|
+
# @param labels [Array]
|
2487
|
+
# Names of the categories. The number of labels must be equal to the number
|
2488
|
+
# of cut points plus one.
|
2489
|
+
# @param left_closed [Boolean]
|
2490
|
+
# Set the intervals to be left-closed instead of right-closed.
|
2491
|
+
# @param include_breaks [Boolean]
|
2492
|
+
# Include a column with the right endpoint of the bin each observation falls
|
2493
|
+
# in. This will change the data type of the output from a
|
2494
|
+
# `Categorical` to a `Struct`.
|
2495
|
+
#
|
2496
|
+
# @return [Expr]
|
2497
|
+
#
|
2498
|
+
# @example Divide a column into three categories.
|
2499
|
+
# df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
|
2500
|
+
# df.with_columns(
|
2501
|
+
# Polars.col("foo").cut([-1, 1], labels: ["a", "b", "c"]).alias("cut")
|
2502
|
+
# )
|
2503
|
+
# # =>
|
2504
|
+
# # shape: (5, 2)
|
2505
|
+
# # ┌─────┬─────┐
|
2506
|
+
# # │ foo ┆ cut │
|
2507
|
+
# # │ --- ┆ --- │
|
2508
|
+
# # │ i64 ┆ cat │
|
2509
|
+
# # ╞═════╪═════╡
|
2510
|
+
# # │ -2 ┆ a │
|
2511
|
+
# # │ -1 ┆ a │
|
2512
|
+
# # │ 0 ┆ b │
|
2513
|
+
# # │ 1 ┆ b │
|
2514
|
+
# # │ 2 ┆ c │
|
2515
|
+
# # └─────┴─────┘
|
2516
|
+
#
|
2517
|
+
# @example Add both the category and the breakpoint.
|
2518
|
+
# df.with_columns(
|
2519
|
+
# Polars.col("foo").cut([-1, 1], include_breaks: true).alias("cut")
|
2520
|
+
# ).unnest("cut")
|
2521
|
+
# # =>
|
2522
|
+
# # shape: (5, 3)
|
2523
|
+
# # ┌─────┬──────┬────────────┐
|
2524
|
+
# # │ foo ┆ brk ┆ foo_bin │
|
2525
|
+
# # │ --- ┆ --- ┆ --- │
|
2526
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2527
|
+
# # ╞═════╪══════╪════════════╡
|
2528
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2529
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2530
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2531
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2532
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2533
|
+
# # └─────┴──────┴────────────┘
|
2534
|
+
def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
|
2535
|
+
wrap_expr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
|
2536
|
+
end
|
2537
|
+
|
2538
|
+
# Bin continuous values into discrete categories based on their quantiles.
|
2539
|
+
#
|
2540
|
+
# @param quantiles [Array]
|
2541
|
+
# Either a list of quantile probabilities between 0 and 1 or a positive
|
2542
|
+
# integer determining the number of bins with uniform probability.
|
2543
|
+
# @param labels [Array]
|
2544
|
+
# Names of the categories. The number of labels must be equal to the number
|
2545
|
+
# of categories.
|
2546
|
+
# @param left_closed [Boolean]
|
2547
|
+
# Set the intervals to be left-closed instead of right-closed.
|
2548
|
+
# @param allow_duplicates [Boolean]
|
2549
|
+
# If set to `true`, duplicates in the resulting quantiles are dropped,
|
2550
|
+
# rather than raising a `DuplicateError`. This can happen even with unique
|
2551
|
+
# probabilities, depending on the data.
|
2552
|
+
# @param include_breaks [Boolean]
|
2553
|
+
# Include a column with the right endpoint of the bin each observation falls
|
2554
|
+
# in. This will change the data type of the output from a
|
2555
|
+
# `Categorical` to a `Struct`.
|
2556
|
+
#
|
2557
|
+
# @return [Expr]
|
2558
|
+
#
|
2559
|
+
# @example Divide a column into three categories according to pre-defined quantile probabilities.
|
2560
|
+
# df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
|
2561
|
+
# df.with_columns(
|
2562
|
+
# Polars.col("foo").qcut([0.25, 0.75], labels: ["a", "b", "c"]).alias("qcut")
|
2563
|
+
# )
|
2564
|
+
# # =>
|
2565
|
+
# # shape: (5, 2)
|
2566
|
+
# # ┌─────┬──────┐
|
2567
|
+
# # │ foo ┆ qcut │
|
2568
|
+
# # │ --- ┆ --- │
|
2569
|
+
# # │ i64 ┆ cat │
|
2570
|
+
# # ╞═════╪══════╡
|
2571
|
+
# # │ -2 ┆ a │
|
2572
|
+
# # │ -1 ┆ a │
|
2573
|
+
# # │ 0 ┆ b │
|
2574
|
+
# # │ 1 ┆ b │
|
2575
|
+
# # │ 2 ┆ c │
|
2576
|
+
# # └─────┴──────┘
|
2577
|
+
#
|
2578
|
+
# @example Divide a column into two categories using uniform quantile probabilities.
|
2579
|
+
# df.with_columns(
|
2580
|
+
# Polars.col("foo")
|
2581
|
+
# .qcut(2, labels: ["low", "high"], left_closed: true)
|
2582
|
+
# .alias("qcut")
|
2583
|
+
# )
|
2584
|
+
# # =>
|
2585
|
+
# # shape: (5, 2)
|
2586
|
+
# # ┌─────┬──────┐
|
2587
|
+
# # │ foo ┆ qcut │
|
2588
|
+
# # │ --- ┆ --- │
|
2589
|
+
# # │ i64 ┆ cat │
|
2590
|
+
# # ╞═════╪══════╡
|
2591
|
+
# # │ -2 ┆ low │
|
2592
|
+
# # │ -1 ┆ low │
|
2593
|
+
# # │ 0 ┆ high │
|
2594
|
+
# # │ 1 ┆ high │
|
2595
|
+
# # │ 2 ┆ high │
|
2596
|
+
# # └─────┴──────┘
|
2597
|
+
#
|
2598
|
+
# @example Add both the category and the breakpoint.
|
2599
|
+
# df.with_columns(
|
2600
|
+
# Polars.col("foo").qcut([0.25, 0.75], include_breaks: true).alias("qcut")
|
2601
|
+
# ).unnest("qcut")
|
2602
|
+
# # =>
|
2603
|
+
# # shape: (5, 3)
|
2604
|
+
# # ┌─────┬──────┬────────────┐
|
2605
|
+
# # │ foo ┆ brk ┆ foo_bin │
|
2606
|
+
# # │ --- ┆ --- ┆ --- │
|
2607
|
+
# # │ i64 ┆ f64 ┆ cat │
|
2608
|
+
# # ╞═════╪══════╪════════════╡
|
2609
|
+
# # │ -2 ┆ -1.0 ┆ (-inf, -1] │
|
2610
|
+
# # │ -1 ┆ -1.0 ┆ (-inf, -1] │
|
2611
|
+
# # │ 0 ┆ 1.0 ┆ (-1, 1] │
|
2612
|
+
# # │ 1 ┆ 1.0 ┆ (-1, 1] │
|
2613
|
+
# # │ 2 ┆ inf ┆ (1, inf] │
|
2614
|
+
# # └─────┴──────┴────────────┘
|
2615
|
+
def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
|
2616
|
+
if quantiles.is_a?(Integer)
|
2617
|
+
rbexpr = _rbexpr.qcut_uniform(
|
2618
|
+
quantiles, labels, left_closed, allow_duplicates, include_breaks
|
2619
|
+
)
|
2620
|
+
else
|
2621
|
+
rbexpr = _rbexpr.qcut(
|
2622
|
+
quantiles, labels, left_closed, allow_duplicates, include_breaks
|
2623
|
+
)
|
2624
|
+
end
|
2625
|
+
|
2626
|
+
wrap_expr(rbexpr)
|
2627
|
+
end
|
2628
|
+
|
2629
|
+
# Get the lengths of runs of identical values.
|
2630
|
+
#
|
2631
|
+
# @return [Expr]
|
2632
|
+
#
|
2633
|
+
# @example
|
2634
|
+
# df = Polars::DataFrame.new(Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3]))
|
2635
|
+
# df.select(Polars.col("s").rle).unnest("s")
|
2636
|
+
# # =>
|
2637
|
+
# # shape: (6, 2)
|
2638
|
+
# # ┌─────────┬────────┐
|
2639
|
+
# # │ lengths ┆ values │
|
2640
|
+
# # │ --- ┆ --- │
|
2641
|
+
# # │ i32 ┆ i64 │
|
2642
|
+
# # ╞═════════╪════════╡
|
2643
|
+
# # │ 2 ┆ 1 │
|
2644
|
+
# # │ 1 ┆ 2 │
|
2645
|
+
# # │ 1 ┆ 1 │
|
2646
|
+
# # │ 1 ┆ null │
|
2647
|
+
# # │ 1 ┆ 1 │
|
2648
|
+
# # │ 2 ┆ 3 │
|
2649
|
+
# # └─────────┴────────┘
|
2650
|
+
def rle
|
2651
|
+
wrap_expr(_rbexpr.rle)
|
2652
|
+
end
|
2653
|
+
|
2654
|
+
# Map values to run IDs.
|
2655
|
+
#
|
2656
|
+
# Similar to RLE, but it maps each value to an ID corresponding to the run into
|
2657
|
+
# which it falls. This is especially useful when you want to define groups by
|
2658
|
+
# runs of identical values rather than the values themselves.
|
2659
|
+
#
|
2660
|
+
# @return [Expr]
|
2661
|
+
#
|
2662
|
+
# @example
|
2663
|
+
# df = Polars::DataFrame.new({"a" => [1, 2, 1, 1, 1], "b" => ["x", "x", nil, "y", "y"]})
|
2664
|
+
# df.with_columns([Polars.col("a").rle_id.alias("a_r"), Polars.struct(["a", "b"]).rle_id.alias("ab_r")])
|
2665
|
+
# # =>
|
2666
|
+
# # shape: (5, 4)
|
2667
|
+
# # ┌─────┬──────┬─────┬──────┐
|
2668
|
+
# # │ a ┆ b ┆ a_r ┆ ab_r │
|
2669
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
2670
|
+
# # │ i64 ┆ str ┆ u32 ┆ u32 │
|
2671
|
+
# # ╞═════╪══════╪═════╪══════╡
|
2672
|
+
# # │ 1 ┆ x ┆ 0 ┆ 0 │
|
2673
|
+
# # │ 2 ┆ x ┆ 1 ┆ 1 │
|
2674
|
+
# # │ 1 ┆ null ┆ 2 ┆ 2 │
|
2675
|
+
# # │ 1 ┆ y ┆ 2 ┆ 3 │
|
2676
|
+
# # │ 1 ┆ y ┆ 2 ┆ 3 │
|
2677
|
+
# # └─────┴──────┴─────┴──────┘
|
2678
|
+
def rle_id
|
2679
|
+
wrap_expr(_rbexpr.rle_id)
|
2680
|
+
end
|
2681
|
+
|
2481
2682
|
# Filter a single column.
|
2482
2683
|
#
|
2483
2684
|
# Mostly useful in an aggregation context. If you want to filter on a DataFrame
|
@@ -2751,8 +2952,8 @@ module Polars
|
|
2751
2952
|
# # │ 4 │
|
2752
2953
|
# # │ 7 │
|
2753
2954
|
# # └─────┘
|
2754
|
-
def gather_every(n)
|
2755
|
-
wrap_expr(_rbexpr.gather_every(n))
|
2955
|
+
def gather_every(n, offset = 0)
|
2956
|
+
wrap_expr(_rbexpr.gather_every(n, offset))
|
2756
2957
|
end
|
2757
2958
|
alias_method :take_every, :gather_every
|
2758
2959
|
|
@@ -3571,14 +3772,15 @@ module Polars
|
|
3571
3772
|
center: false,
|
3572
3773
|
by: nil,
|
3573
3774
|
closed: "left",
|
3574
|
-
ddof: 1
|
3775
|
+
ddof: 1,
|
3776
|
+
warn_if_unsorted: true
|
3575
3777
|
)
|
3576
3778
|
window_size, min_periods = _prepare_rolling_window_args(
|
3577
3779
|
window_size, min_periods
|
3578
3780
|
)
|
3579
3781
|
wrap_expr(
|
3580
3782
|
_rbexpr.rolling_std(
|
3581
|
-
window_size, weights, min_periods, center, by, closed, ddof
|
3783
|
+
window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
|
3582
3784
|
)
|
3583
3785
|
)
|
3584
3786
|
end
|
@@ -3661,14 +3863,15 @@ module Polars
|
|
3661
3863
|
center: false,
|
3662
3864
|
by: nil,
|
3663
3865
|
closed: "left",
|
3664
|
-
ddof: 1
|
3866
|
+
ddof: 1,
|
3867
|
+
warn_if_unsorted: true
|
3665
3868
|
)
|
3666
3869
|
window_size, min_periods = _prepare_rolling_window_args(
|
3667
3870
|
window_size, min_periods
|
3668
3871
|
)
|
3669
3872
|
wrap_expr(
|
3670
3873
|
_rbexpr.rolling_var(
|
3671
|
-
window_size, weights, min_periods, center, by, closed, ddof
|
3874
|
+
window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
|
3672
3875
|
)
|
3673
3876
|
)
|
3674
3877
|
end
|
@@ -3746,14 +3949,15 @@ module Polars
|
|
3746
3949
|
min_periods: nil,
|
3747
3950
|
center: false,
|
3748
3951
|
by: nil,
|
3749
|
-
closed: "left"
|
3952
|
+
closed: "left",
|
3953
|
+
warn_if_unsorted: true
|
3750
3954
|
)
|
3751
3955
|
window_size, min_periods = _prepare_rolling_window_args(
|
3752
3956
|
window_size, min_periods
|
3753
3957
|
)
|
3754
3958
|
wrap_expr(
|
3755
3959
|
_rbexpr.rolling_median(
|
3756
|
-
window_size, weights, min_periods, center, by, closed
|
3960
|
+
window_size, weights, min_periods, center, by, closed, warn_if_unsorted
|
3757
3961
|
)
|
3758
3962
|
)
|
3759
3963
|
end
|
@@ -3837,14 +4041,15 @@ module Polars
|
|
3837
4041
|
min_periods: nil,
|
3838
4042
|
center: false,
|
3839
4043
|
by: nil,
|
3840
|
-
closed: "left"
|
4044
|
+
closed: "left",
|
4045
|
+
warn_if_unsorted: true
|
3841
4046
|
)
|
3842
4047
|
window_size, min_periods = _prepare_rolling_window_args(
|
3843
4048
|
window_size, min_periods
|
3844
4049
|
)
|
3845
4050
|
wrap_expr(
|
3846
4051
|
_rbexpr.rolling_quantile(
|
3847
|
-
quantile, interpolation, window_size, weights, min_periods, center, by, closed
|
4052
|
+
quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
|
3848
4053
|
)
|
3849
4054
|
)
|
3850
4055
|
end
|
data/lib/polars/group_by.rb
CHANGED
@@ -47,7 +47,7 @@ module Polars
|
|
47
47
|
|
48
48
|
# When grouping by a single column, group name is a single value
|
49
49
|
# When grouping by multiple columns, group name is a tuple of values
|
50
|
-
if @by.is_a?(String) || @by.is_a?(Expr)
|
50
|
+
if @by.is_a?(::String) || @by.is_a?(Expr)
|
51
51
|
_group_names = group_names.to_series.each
|
52
52
|
else
|
53
53
|
_group_names = group_names.iter_rows
|
data/lib/polars/io.rb
CHANGED
@@ -616,7 +616,7 @@ module Polars
|
|
616
616
|
query
|
617
617
|
elsif query.is_a?(ActiveRecord::Relation)
|
618
618
|
query.connection.select_all(query.to_sql)
|
619
|
-
elsif query.is_a?(String)
|
619
|
+
elsif query.is_a?(::String)
|
620
620
|
ActiveRecord::Base.connection.select_all(query)
|
621
621
|
else
|
622
622
|
raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
|
@@ -652,7 +652,7 @@ module Polars
|
|
652
652
|
when :integer
|
653
653
|
Int64
|
654
654
|
when :string, :text
|
655
|
-
|
655
|
+
String
|
656
656
|
when :time
|
657
657
|
Time
|
658
658
|
end
|
@@ -856,7 +856,7 @@ module Polars
|
|
856
856
|
private
|
857
857
|
|
858
858
|
def _prepare_file_arg(file)
|
859
|
-
if file.is_a?(String) && file =~ /\Ahttps?:\/\//
|
859
|
+
if file.is_a?(::String) && file =~ /\Ahttps?:\/\//
|
860
860
|
raise ArgumentError, "use URI(...) for remote files"
|
861
861
|
end
|
862
862
|
|
@@ -870,7 +870,7 @@ module Polars
|
|
870
870
|
end
|
871
871
|
|
872
872
|
def _check_arg_is_1byte(arg_name, arg, can_be_empty = false)
|
873
|
-
if arg.is_a?(String)
|
873
|
+
if arg.is_a?(::String)
|
874
874
|
arg_byte_length = arg.bytesize
|
875
875
|
if can_be_empty
|
876
876
|
if arg_byte_length > 1
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -218,7 +218,7 @@ module Polars
|
|
218
218
|
# }
|
219
219
|
# ).lazy
|
220
220
|
# lf.dtypes
|
221
|
-
# # => [Polars::Int64, Polars::Float64, Polars::
|
221
|
+
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
222
222
|
def dtypes
|
223
223
|
_ldf.dtypes
|
224
224
|
end
|
@@ -236,7 +236,7 @@ module Polars
|
|
236
236
|
# }
|
237
237
|
# ).lazy
|
238
238
|
# lf.schema
|
239
|
-
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::
|
239
|
+
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
|
240
240
|
def schema
|
241
241
|
_ldf.schema
|
242
242
|
end
|
@@ -399,7 +399,7 @@ module Polars
|
|
399
399
|
# # │ 1 ┆ 6.0 ┆ a │
|
400
400
|
# # └─────┴─────┴─────┘
|
401
401
|
def sort(by, reverse: false, nulls_last: false, maintain_order: false)
|
402
|
-
if by.is_a?(String)
|
402
|
+
if by.is_a?(::String)
|
403
403
|
return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
|
404
404
|
end
|
405
405
|
if Utils.bool?(reverse)
|
@@ -1371,7 +1371,7 @@ module Polars
|
|
1371
1371
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
1372
1372
|
end
|
1373
1373
|
|
1374
|
-
if on.is_a?(String)
|
1374
|
+
if on.is_a?(::String)
|
1375
1375
|
left_on = on
|
1376
1376
|
right_on = on
|
1377
1377
|
end
|
@@ -1380,19 +1380,19 @@ module Polars
|
|
1380
1380
|
raise ArgumentError, "You should pass the column to join on as an argument."
|
1381
1381
|
end
|
1382
1382
|
|
1383
|
-
if by_left.is_a?(String) || by_left.is_a?(Expr)
|
1383
|
+
if by_left.is_a?(::String) || by_left.is_a?(Expr)
|
1384
1384
|
by_left_ = [by_left]
|
1385
1385
|
else
|
1386
1386
|
by_left_ = by_left
|
1387
1387
|
end
|
1388
1388
|
|
1389
|
-
if by_right.is_a?(String) || by_right.is_a?(Expr)
|
1389
|
+
if by_right.is_a?(::String) || by_right.is_a?(Expr)
|
1390
1390
|
by_right_ = [by_right]
|
1391
1391
|
else
|
1392
1392
|
by_right_ = by_right
|
1393
1393
|
end
|
1394
1394
|
|
1395
|
-
if by.is_a?(String)
|
1395
|
+
if by.is_a?(::String)
|
1396
1396
|
by_left_ = [by]
|
1397
1397
|
by_right_ = [by]
|
1398
1398
|
elsif by.is_a?(::Array)
|
@@ -1402,7 +1402,7 @@ module Polars
|
|
1402
1402
|
|
1403
1403
|
tolerance_str = nil
|
1404
1404
|
tolerance_num = nil
|
1405
|
-
if tolerance.is_a?(String)
|
1405
|
+
if tolerance.is_a?(::String)
|
1406
1406
|
tolerance_str = tolerance
|
1407
1407
|
else
|
1408
1408
|
tolerance_num = tolerance
|
@@ -1478,17 +1478,17 @@ module Polars
|
|
1478
1478
|
# @example
|
1479
1479
|
# df.join(other_df, on: "ham", how: "outer").collect
|
1480
1480
|
# # =>
|
1481
|
-
# # shape: (4,
|
1482
|
-
# #
|
1483
|
-
# # │ foo ┆ bar ┆ ham
|
1484
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
1485
|
-
# # │ i64 ┆ f64 ┆ str ┆ str
|
1486
|
-
# #
|
1487
|
-
# # │ 1 ┆ 6.0 ┆ a
|
1488
|
-
# # │ 2 ┆ 7.0 ┆ b
|
1489
|
-
# # │ null ┆ null ┆
|
1490
|
-
# # │ 3 ┆ 8.0 ┆ c
|
1491
|
-
# #
|
1481
|
+
# # shape: (4, 5)
|
1482
|
+
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
1483
|
+
# # │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │
|
1484
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1485
|
+
# # │ i64 ┆ f64 ┆ str ┆ str ┆ str │
|
1486
|
+
# # ╞══════╪══════╪══════╪═══════╪═══════════╡
|
1487
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x ┆ a │
|
1488
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y ┆ b │
|
1489
|
+
# # │ null ┆ null ┆ null ┆ z ┆ d │
|
1490
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null ┆ null │
|
1491
|
+
# # └──────┴──────┴──────┴───────┴───────────┘
|
1492
1492
|
#
|
1493
1493
|
# @example
|
1494
1494
|
# df.join(other_df, on: "ham", how: "left").collect
|
@@ -1722,7 +1722,7 @@ module Polars
|
|
1722
1722
|
#
|
1723
1723
|
# @return [LazyFrame]
|
1724
1724
|
def drop(columns)
|
1725
|
-
if columns.is_a?(String)
|
1725
|
+
if columns.is_a?(::String)
|
1726
1726
|
columns = [columns]
|
1727
1727
|
end
|
1728
1728
|
_from_rbldf(_ldf.drop_columns(columns))
|
@@ -2363,10 +2363,10 @@ module Polars
|
|
2363
2363
|
# # │ z ┆ c ┆ 6 │
|
2364
2364
|
# # └─────┴──────────┴───────┘
|
2365
2365
|
def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil, streamable: true)
|
2366
|
-
if value_vars.is_a?(String)
|
2366
|
+
if value_vars.is_a?(::String)
|
2367
2367
|
value_vars = [value_vars]
|
2368
2368
|
end
|
2369
|
-
if id_vars.is_a?(String)
|
2369
|
+
if id_vars.is_a?(::String)
|
2370
2370
|
id_vars = [id_vars]
|
2371
2371
|
end
|
2372
2372
|
if value_vars.nil?
|
@@ -2464,7 +2464,7 @@ module Polars
|
|
2464
2464
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
2465
2465
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
2466
2466
|
def unnest(names)
|
2467
|
-
if names.is_a?(String)
|
2467
|
+
if names.is_a?(::String)
|
2468
2468
|
names = [names]
|
2469
2469
|
end
|
2470
2470
|
_from_rbldf(_ldf.unnest(names))
|
@@ -107,44 +107,28 @@ module Polars
|
|
107
107
|
# Get the maximum value.
|
108
108
|
#
|
109
109
|
# @param column [Object]
|
110
|
-
# Column(s) to be used in aggregation.
|
111
|
-
# the input:
|
112
|
-
#
|
113
|
-
# - [String, Series] -> aggregate the maximum value of that column.
|
114
|
-
# - [Array<Expr>] -> aggregate the maximum value horizontally.
|
110
|
+
# Column(s) to be used in aggregation.
|
115
111
|
#
|
116
112
|
# @return [Expr, Object]
|
117
113
|
def max(column)
|
118
114
|
if column.is_a?(Series)
|
119
115
|
column.max
|
120
|
-
elsif Utils.strlike?(column)
|
121
|
-
col(column).max
|
122
116
|
else
|
123
|
-
|
124
|
-
# TODO
|
125
|
-
Utils.wrap_expr(_max_exprs(exprs))
|
117
|
+
col(column).max
|
126
118
|
end
|
127
119
|
end
|
128
120
|
|
129
121
|
# Get the minimum value.
|
130
122
|
#
|
131
123
|
# @param column [Object]
|
132
|
-
# Column(s) to be used in aggregation.
|
133
|
-
# the input:
|
134
|
-
#
|
135
|
-
# - [String, Series] -> aggregate the minimum value of that column.
|
136
|
-
# - [Array<Expr>] -> aggregate the minimum value horizontally.
|
124
|
+
# Column(s) to be used in aggregation.
|
137
125
|
#
|
138
126
|
# @return [Expr, Object]
|
139
127
|
def min(column)
|
140
128
|
if column.is_a?(Series)
|
141
129
|
column.min
|
142
|
-
elsif Utils.strlike?(column)
|
143
|
-
col(column).min
|
144
130
|
else
|
145
|
-
|
146
|
-
# TODO
|
147
|
-
Utils.wrap_expr(_min_exprs(exprs))
|
131
|
+
col(column).min
|
148
132
|
end
|
149
133
|
end
|
150
134
|
|