polars-df 0.17.1-x86_64-darwin → 0.19.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
@@ -481,6 +481,8 @@ module Polars
481
481
  #
482
482
  # @param item [Object]
483
483
  # Item that will be checked for membership
484
+ # @param nulls_equal [Boolean]
485
+ # If true, treat null as a distinct value. Null values will not propagate.
484
486
  #
485
487
  # @return [Expr]
486
488
  #
@@ -501,9 +503,9 @@ module Polars
501
503
  # # │ ["x", "y"] ┆ false │
502
504
  # # │ ["a", "c"] ┆ true │
503
505
  # # └───────────────┴──────────┘
504
- def contains(item)
506
+ def contains(item, nulls_equal: true)
505
507
  item = Utils.parse_into_expression(item, str_as_lit: true)
506
- Utils.wrap_expr(_rbexpr.arr_contains(item))
508
+ Utils.wrap_expr(_rbexpr.arr_contains(item, nulls_equal))
507
509
  end
508
510
 
509
511
  # Count how often the value produced by `element` occurs.
data/lib/polars/expr.rb CHANGED
@@ -1176,8 +1176,8 @@ module Polars
1176
1176
  # # │ 1.0 │
1177
1177
  # # │ 1.2 │
1178
1178
  # # └─────┘
1179
- def round(decimals = 0)
1180
- _from_rbexpr(_rbexpr.round(decimals))
1179
+ def round(decimals = 0, mode: "half_to_even")
1180
+ _from_rbexpr(_rbexpr.round(decimals, mode))
1181
1181
  end
1182
1182
 
1183
1183
  # Compute the dot/inner product between two Expressions.
@@ -1867,7 +1867,7 @@ module Polars
1867
1867
  # # │ 2 ┆ 6 │
1868
1868
  # # └─────┴─────┘
1869
1869
  def forward_fill(limit: nil)
1870
- _from_rbexpr(_rbexpr.forward_fill(limit))
1870
+ fill_null(strategy: "forward", limit: limit)
1871
1871
  end
1872
1872
 
1873
1873
  # Fill missing values with the next to be seen values.
@@ -1897,7 +1897,7 @@ module Polars
1897
1897
  # # │ null ┆ 6 │
1898
1898
  # # └──────┴─────┘
1899
1899
  def backward_fill(limit: nil)
1900
- _from_rbexpr(_rbexpr.backward_fill(limit))
1900
+ fill_null(strategy: "backward", limit: limit)
1901
1901
  end
1902
1902
 
1903
1903
  # Reverse the selection.
@@ -3712,6 +3712,8 @@ module Polars
3712
3712
  #
3713
3713
  # @param other [Object]
3714
3714
  # Series or sequence of primitive type.
3715
+ # @param nulls_equal [Boolean]
3716
+ # If true, treat null as a distinct value. Null values will not propagate.
3715
3717
  #
3716
3718
  # @return [Expr]
3717
3719
  #
@@ -3719,29 +3721,21 @@ module Polars
3719
3721
  # df = Polars::DataFrame.new(
3720
3722
  # {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
3721
3723
  # )
3722
- # df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
3724
+ # df.with_columns(contains: Polars.col("optional_members").is_in("sets"))
3723
3725
  # # =>
3724
- # # shape: (3, 1)
3725
- # # ┌──────────┐
3726
- # # │ contains │
3727
- # # │ --- │
3728
- # # │ bool │
3729
- # # ╞══════════╡
3730
- # # │ true │
3731
- # # │ true │
3732
- # # │ false │
3733
- # # └──────────┘
3734
- def is_in(other)
3735
- if other.is_a?(::Array)
3736
- if other.length == 0
3737
- other = Polars.lit(nil)._rbexpr
3738
- else
3739
- other = Polars.lit(Series.new(other))._rbexpr
3740
- end
3741
- else
3742
- other = Utils.parse_into_expression(other, str_as_lit: false)
3743
- end
3744
- _from_rbexpr(_rbexpr.is_in(other))
3726
+ # # shape: (3, 3)
3727
+ # # ┌───────────┬──────────────────┬──────────┐
3728
+ # # │ sets ┆ optional_members ┆ contains │
3729
+ # # │ --- ┆ --- ┆ ---
3730
+ # # │ list[i64] ┆ i64 ┆ bool │
3731
+ # # ╞═══════════╪══════════════════╪══════════╡
3732
+ # # │ [1, 2, 3] ┆ 1 ┆ true │
3733
+ # # │ [1, 2] ┆ 2 ┆ true │
3734
+ # # │ [9, 10] ┆ 3 ┆ false │
3735
+ # # └───────────┴──────────────────┴──────────┘
3736
+ def is_in(other, nulls_equal: false)
3737
+ other = Utils.parse_into_expression(other)
3738
+ _from_rbexpr(_rbexpr.is_in(other, nulls_equal))
3745
3739
  end
3746
3740
  alias_method :in?, :is_in
3747
3741
 
@@ -5715,6 +5709,11 @@ module Polars
5715
5709
  # Integer size of the rolling window.
5716
5710
  # @param bias [Boolean]
5717
5711
  # If false, the calculations are corrected for statistical bias.
5712
+ # @param min_samples [Integer]
5713
+ # The number of values in the window that should be non-null before computing
5714
+ # a result. If set to `nil` (default), it will be set equal to `window_size`.
5715
+ # @param center [Boolean]
5716
+ # Set the labels at the center of the window.
5718
5717
  #
5719
5718
  # @return [Expr]
5720
5719
  #
@@ -5733,8 +5732,8 @@ module Polars
5733
5732
  # # │ 0.381802 │
5734
5733
  # # │ 0.47033 │
5735
5734
  # # └──────────┘
5736
- def rolling_skew(window_size, bias: true)
5737
- _from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
5735
+ def rolling_skew(window_size, bias: true, min_samples: nil, center: false)
5736
+ _from_rbexpr(_rbexpr.rolling_skew(window_size, bias, min_samples, center))
5738
5737
  end
5739
5738
 
5740
5739
  # Compute absolute values.
@@ -5889,6 +5888,7 @@ module Polars
5889
5888
  # # │ 20 │
5890
5889
  # # └──────┘
5891
5890
  def diff(n: 1, null_behavior: "ignore")
5891
+ n = Utils.parse_into_expression(n)
5892
5892
  _from_rbexpr(_rbexpr.diff(n, null_behavior))
5893
5893
  end
5894
5894
 
@@ -16,20 +16,15 @@ module Polars
16
16
  elsif value.is_a?(::Date)
17
17
  return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
18
18
  elsif value.is_a?(Polars::Series)
19
- name = value.name
20
19
  value = value._s
21
- e = Utils.wrap_expr(Plr.lit(value, allow_object))
22
- if name == ""
23
- return e
24
- end
25
- return e.alias(name)
20
+ return Utils.wrap_expr(Plr.lit(value, allow_object, false))
26
21
  elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
27
- return lit(Series.new("", value))
22
+ return Utils.wrap_expr(Plr.lit(Series.new("literal", [value.to_a], dtype: dtype)._s, allow_object, true))
28
23
  elsif dtype
29
- return Utils.wrap_expr(Plr.lit(value, allow_object)).cast(dtype)
24
+ return Utils.wrap_expr(Plr.lit(value, allow_object, true)).cast(dtype)
30
25
  end
31
26
 
32
- Utils.wrap_expr(Plr.lit(value, allow_object))
27
+ Utils.wrap_expr(Plr.lit(value, allow_object, true))
33
28
  end
34
29
  end
35
30
  end
@@ -51,7 +51,7 @@ module Polars
51
51
  when :decimal
52
52
  Decimal
53
53
  when :float
54
- # TODO uncomment in 0.18.0
54
+ # TODO uncomment in future release
55
55
  # if column_type.limit && column_type.limit <= 24
56
56
  # Float32
57
57
  # else
@@ -59,7 +59,7 @@ module Polars
59
59
  # end
60
60
  Float64
61
61
  when :integer
62
- # TODO uncomment in 0.18.0
62
+ # TODO uncomment in future release
63
63
  # case column_type.limit
64
64
  # when 1
65
65
  # Int8
@@ -433,7 +433,10 @@ module Polars
433
433
  no_optimization: false,
434
434
  slice_pushdown: true,
435
435
  storage_options: nil,
436
- retries: 2
436
+ retries: 2,
437
+ sync_on_close: nil,
438
+ mkdir: false,
439
+ lazy: false
437
440
  )
438
441
  lf = _set_sink_optimizations(
439
442
  type_coercion: type_coercion,
@@ -468,17 +471,30 @@ module Polars
468
471
  storage_options = nil
469
472
  end
470
473
 
471
- lf.sink_parquet(
474
+ sink_options = {
475
+ "sync_on_close" => sync_on_close || "none",
476
+ "maintain_order" => maintain_order,
477
+ "mkdir" => mkdir
478
+ }
479
+
480
+ lf = lf.sink_parquet(
472
481
  path,
473
482
  compression,
474
483
  compression_level,
475
484
  statistics,
476
485
  row_group_size,
477
486
  data_pagesize_limit,
478
- maintain_order,
479
487
  storage_options,
480
- retries
488
+ retries,
489
+ sink_options
481
490
  )
491
+ lf = LazyFrame._from_rbldf(lf)
492
+
493
+ if !lazy
494
+ lf.collect
495
+ return nil
496
+ end
497
+ lf
482
498
  end
483
499
 
484
500
  # Evaluate the query in streaming mode and write to an IPC file.
@@ -520,7 +536,10 @@ module Polars
520
536
  projection_pushdown: true,
521
537
  simplify_expression: true,
522
538
  slice_pushdown: true,
523
- no_optimization: false
539
+ no_optimization: false,
540
+ sync_on_close: nil,
541
+ mkdir: false,
542
+ lazy: false
524
543
  )
525
544
  # TODO support storage options in Rust
526
545
  storage_options = nil
@@ -541,13 +560,26 @@ module Polars
541
560
  storage_options = nil
542
561
  end
543
562
 
544
- lf.sink_ipc(
563
+ sink_options = {
564
+ "sync_on_close" => sync_on_close || "none",
565
+ "maintain_order" => maintain_order,
566
+ "mkdir" => mkdir
567
+ }
568
+
569
+ lf = lf.sink_ipc(
545
570
  path,
546
571
  compression,
547
- maintain_order,
548
572
  storage_options,
549
- retries
573
+ retries,
574
+ sink_options
550
575
  )
576
+ lf = LazyFrame._from_rbldf(lf)
577
+
578
+ if !lazy
579
+ lf.collect
580
+ return nil
581
+ end
582
+ lf
551
583
  end
552
584
 
553
585
  # Evaluate the query in streaming mode and write to a CSV file.
@@ -652,7 +684,10 @@ module Polars
652
684
  slice_pushdown: true,
653
685
  no_optimization: false,
654
686
  storage_options: nil,
655
- retries: 2
687
+ retries: 2,
688
+ sync_on_close: nil,
689
+ mkdir: false,
690
+ lazy: false
656
691
  )
657
692
  Utils._check_arg_is_1byte("separator", separator, false)
658
693
  Utils._check_arg_is_1byte("quote_char", quote_char, false)
@@ -672,7 +707,13 @@ module Polars
672
707
  storage_options = nil
673
708
  end
674
709
 
675
- lf.sink_csv(
710
+ sink_options = {
711
+ "sync_on_close" => sync_on_close || "none",
712
+ "maintain_order" => maintain_order,
713
+ "mkdir" => mkdir
714
+ }
715
+
716
+ lf = lf.sink_csv(
676
717
  path,
677
718
  include_bom,
678
719
  include_header,
@@ -687,10 +728,17 @@ module Polars
687
728
  float_precision,
688
729
  null_value,
689
730
  quote_style,
690
- maintain_order,
691
731
  storage_options,
692
- retries
732
+ retries,
733
+ sink_options
693
734
  )
735
+ lf = LazyFrame._from_rbldf(lf)
736
+
737
+ if !lazy
738
+ lf.collect
739
+ return nil
740
+ end
741
+ lf
694
742
  end
695
743
 
696
744
  # Evaluate the query in streaming mode and write to an NDJSON file.
@@ -730,7 +778,10 @@ module Polars
730
778
  slice_pushdown: true,
731
779
  no_optimization: false,
732
780
  storage_options: nil,
733
- retries: 2
781
+ retries: 2,
782
+ sync_on_close: nil,
783
+ mkdir: false,
784
+ lazy: false
734
785
  )
735
786
  lf = _set_sink_optimizations(
736
787
  type_coercion: type_coercion,
@@ -747,7 +798,20 @@ module Polars
747
798
  storage_options = nil
748
799
  end
749
800
 
750
- lf.sink_json(path, maintain_order, storage_options, retries)
801
+ sink_options = {
802
+ "sync_on_close" => sync_on_close || "none",
803
+ "maintain_order" => maintain_order,
804
+ "mkdir" => mkdir
805
+ }
806
+
807
+ lf = lf.sink_json(path, storage_options, retries, sink_options)
808
+ lf = LazyFrame._from_rbldf(lf)
809
+
810
+ if !lazy
811
+ lf.collect
812
+ return nil
813
+ end
814
+ lf
751
815
  end
752
816
 
753
817
  # @private
@@ -403,7 +403,7 @@ module Polars
403
403
  # The indices may be defined in a single column, or by sublists in another
404
404
  # column of dtype `List`.
405
405
  #
406
- # @param index [Object]
406
+ # @param indices [Object]
407
407
  # Indices to return per sublist
408
408
  # @param null_on_oob [Boolean]
409
409
  # Behavior if an index is out of bounds:
@@ -427,12 +427,9 @@ module Polars
427
427
  # # │ [] ┆ [null, null] │
428
428
  # # │ [1, 2, … 5] ┆ [1, 5] │
429
429
  # # └─────────────┴──────────────┘
430
- def gather(index, null_on_oob: false)
431
- if index.is_a?(::Array)
432
- index = Series.new(index)
433
- end
434
- index = Utils.parse_into_expression(index, str_as_lit: false)
435
- Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
430
+ def gather(indices, null_on_oob: false)
431
+ indices = Utils.parse_into_expression(indices)
432
+ Utils.wrap_expr(_rbexpr.list_gather(indices, null_on_oob))
436
433
  end
437
434
  alias_method :take, :gather
438
435
 
@@ -484,6 +481,8 @@ module Polars
484
481
  #
485
482
  # @param item [Object]
486
483
  # Item that will be checked for membership
484
+ # @param nulls_equal [Boolean]
485
+ # If true, treat null as a distinct value. Null values will not propagate.
487
486
  #
488
487
  # @return [Expr]
489
488
  #
@@ -501,8 +500,8 @@ module Polars
501
500
  # # │ false │
502
501
  # # │ true │
503
502
  # # └───────┘
504
- def contains(item)
505
- Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
503
+ def contains(item, nulls_equal: true)
504
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item), nulls_equal))
506
505
  end
507
506
 
508
507
  # Join all string items in a sublist and place a separator between them.
@@ -749,9 +748,9 @@ module Polars
749
748
  # # │ {1,2,3} │
750
749
  # # │ {1,2,null} │
751
750
  # # └────────────┘
752
- def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
751
+ def to_struct(n_field_strategy: "first_non_null", name_generator: nil, upper_bound: nil)
753
752
  raise Todo if name_generator
754
- Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
753
+ Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, nil))
755
754
  end
756
755
 
757
756
  # Run any polars expression against the lists' elements.
data/lib/polars/series.rb CHANGED
@@ -2144,18 +2144,33 @@ module Polars
2144
2144
 
2145
2145
  # Check if elements of this Series are in the other Series.
2146
2146
  #
2147
+ # @param nulls_equal [Boolean]
2148
+ # If true, treat null as a distinct value. Null values will not propagate.
2149
+ #
2147
2150
  # @return [Series]
2148
2151
  #
2149
2152
  # @example
2150
2153
  # s = Polars::Series.new("a", [1, 2, 3])
2151
- # s2 = Polars::Series.new("b", [2, 4])
2154
+ # s2 = Polars::Series.new("b", [2, 4, nil])
2152
2155
  # s2.is_in(s)
2153
2156
  # # =>
2154
- # # shape: (2,)
2157
+ # # shape: (3,)
2158
+ # # Series: 'b' [bool]
2159
+ # # [
2160
+ # # true
2161
+ # # false
2162
+ # # null
2163
+ # # ]
2164
+ #
2165
+ # @example
2166
+ # s2.is_in(s, nulls_equal: true)
2167
+ # # =>
2168
+ # # shape: (3,)
2155
2169
  # # Series: 'b' [bool]
2156
2170
  # # [
2157
2171
  # # true
2158
2172
  # # false
2173
+ # # false
2159
2174
  # # ]
2160
2175
  #
2161
2176
  # @example
@@ -2190,7 +2205,7 @@ module Polars
2190
2205
  # # true
2191
2206
  # # false
2192
2207
  # # ]
2193
- def is_in(other)
2208
+ def is_in(other, nulls_equal: false)
2194
2209
  super
2195
2210
  end
2196
2211
  alias_method :in?, :is_in
@@ -3577,24 +3592,26 @@ module Polars
3577
3592
  # Integer size of the rolling window.
3578
3593
  # @param bias [Boolean]
3579
3594
  # If false, the calculations are corrected for statistical bias.
3595
+ # @param min_samples [Integer]
3596
+ # The number of values in the window that should be non-null before computing
3597
+ # a result. If set to `nil` (default), it will be set equal to `window_size`.
3598
+ # @param center [Boolean]
3599
+ # Set the labels at the center of the window.
3580
3600
  #
3581
3601
  # @return [Series]
3582
3602
  #
3583
3603
  # @example
3584
- # s = Polars::Series.new("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0])
3585
- # s.rolling_skew(3)
3604
+ # Polars::Series.new([1, 4, 2, 9]).rolling_skew(3)
3586
3605
  # # =>
3587
- # # shape: (6,)
3588
- # # Series: 'a' [f64]
3606
+ # # shape: (4,)
3607
+ # # Series: '' [f64]
3589
3608
  # # [
3590
3609
  # # null
3591
3610
  # # null
3592
- # # 0.0
3593
- # # 0.0
3594
3611
  # # 0.381802
3595
- # # 0.0
3612
+ # # 0.47033
3596
3613
  # # ]
3597
- def rolling_skew(window_size, bias: true)
3614
+ def rolling_skew(window_size, bias: true, min_samples: nil, center: false)
3598
3615
  super
3599
3616
  end
3600
3617
 
@@ -4043,7 +4060,7 @@ module Polars
4043
4060
  #
4044
4061
  # @example
4045
4062
  # s.kurtosis(fisher: false, bias: false)
4046
- # # => 2.1040361802642726
4063
+ # # => 2.1040361802642717
4047
4064
  def kurtosis(fisher: true, bias: true)
4048
4065
  _s.kurtosis(fisher, bias)
4049
4066
  end
@@ -1403,7 +1403,7 @@ module Polars
1403
1403
  # # │ Can you feel the love tonight ┆ true │
1404
1404
  # # └─────────────────────────────────┴──────────────┘
1405
1405
  def contains_any(patterns, ascii_case_insensitive: false)
1406
- patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
1406
+ patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
1407
1407
  Utils.wrap_expr(
1408
1408
  _rbexpr.str_contains_any(patterns, ascii_case_insensitive)
1409
1409
  )
@@ -1474,9 +1474,9 @@ module Polars
1474
1474
  # # │ Can you feel the love tonight ┆ Can me feel the love tonight │
1475
1475
  # # └─────────────────────────────────┴─────────────────────────────────┘
1476
1476
  def replace_many(patterns, replace_with, ascii_case_insensitive: false)
1477
- patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
1477
+ patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
1478
1478
  replace_with = Utils.parse_into_expression(
1479
- replace_with, str_as_lit: true, list_as_series: true
1479
+ replace_with, str_as_lit: true
1480
1480
  )
1481
1481
  Utils.wrap_expr(
1482
1482
  _rbexpr.str_replace_many(
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.17.1"
3
+ VERSION = "0.19.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.19.0
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-04-13 00:00:00.000000000 Z
11
+ date: 2025-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal