polars-df 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +185 -325
- data/ext/polars/Cargo.toml +7 -7
- data/ext/polars/src/conversion/any_value.rs +1 -1
- data/ext/polars/src/conversion/mod.rs +18 -2
- data/ext/polars/src/expr/array.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +16 -9
- data/ext/polars/src/expr/general.rs +14 -27
- data/ext/polars/src/expr/list.rs +13 -22
- data/ext/polars/src/expr/meta.rs +18 -7
- data/ext/polars/src/expr/string.rs +2 -3
- data/ext/polars/src/file.rs +1 -1
- data/ext/polars/src/functions/lazy.rs +11 -2
- data/ext/polars/src/functions/range.rs +18 -14
- data/ext/polars/src/lazyframe/general.rs +3 -1
- data/ext/polars/src/lib.rs +9 -9
- data/lib/polars/array_expr.rb +4 -2
- data/lib/polars/expr.rb +15 -17
- data/lib/polars/functions/eager.rb +1 -1
- data/lib/polars/functions/lit.rb +4 -9
- data/lib/polars/list_expr.rb +15 -14
- data/lib/polars/list_name_space.rb +1 -7
- data/lib/polars/meta_expr.rb +7 -7
- data/lib/polars/series.rb +30 -11
- data/lib/polars/string_expr.rb +3 -3
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/version.rb +1 -1
- metadata +1 -1
data/lib/polars/expr.rb
CHANGED
@@ -1512,6 +1512,13 @@ module Polars
|
|
1512
1512
|
#
|
1513
1513
|
# @param element [Object]
|
1514
1514
|
# Expression or scalar value.
|
1515
|
+
# @param side ['any', 'left', 'right']
|
1516
|
+
# If 'any', the index of the first suitable location found is given.
|
1517
|
+
# If 'left', the index of the leftmost suitable location found is given.
|
1518
|
+
# If 'right', return the rightmost suitable location found is given.
|
1519
|
+
# @param descending [Boolean]
|
1520
|
+
# Boolean indicating whether the values are descending or not (they
|
1521
|
+
# are required to be sorted either way).
|
1515
1522
|
#
|
1516
1523
|
# @return [Expr]
|
1517
1524
|
#
|
@@ -1537,9 +1544,9 @@ module Polars
|
|
1537
1544
|
# # ╞══════╪═══════╪═════╡
|
1538
1545
|
# # │ 0 ┆ 2 ┆ 4 │
|
1539
1546
|
# # └──────┴───────┴─────┘
|
1540
|
-
def search_sorted(element, side: "any")
|
1547
|
+
def search_sorted(element, side: "any", descending: false)
|
1541
1548
|
element = Utils.parse_into_expression(element, str_as_lit: false)
|
1542
|
-
_from_rbexpr(_rbexpr.search_sorted(element, side))
|
1549
|
+
_from_rbexpr(_rbexpr.search_sorted(element, side, descending))
|
1543
1550
|
end
|
1544
1551
|
|
1545
1552
|
# Sort this column by the ordering of another column, or multiple other columns.
|
@@ -3734,15 +3741,7 @@ module Polars
|
|
3734
3741
|
# # │ [9, 10] ┆ 3 ┆ false │
|
3735
3742
|
# # └───────────┴──────────────────┴──────────┘
|
3736
3743
|
def is_in(other, nulls_equal: false)
|
3737
|
-
|
3738
|
-
if other.length == 0
|
3739
|
-
other = Polars.lit(nil)._rbexpr
|
3740
|
-
else
|
3741
|
-
other = Polars.lit(Series.new(other))._rbexpr
|
3742
|
-
end
|
3743
|
-
else
|
3744
|
-
other = Utils.parse_into_expression(other, str_as_lit: false)
|
3745
|
-
end
|
3744
|
+
other = Utils.parse_into_expression(other)
|
3746
3745
|
_from_rbexpr(_rbexpr.is_in(other, nulls_equal))
|
3747
3746
|
end
|
3748
3747
|
alias_method :in?, :is_in
|
@@ -6659,6 +6658,8 @@ module Polars
|
|
6659
6658
|
# # │ 99 │
|
6660
6659
|
# # └────────┘
|
6661
6660
|
def extend_constant(value, n)
|
6661
|
+
value = Utils.parse_into_expression(value, str_as_lit: true)
|
6662
|
+
n = Utils.parse_into_expression(n)
|
6662
6663
|
_from_rbexpr(_rbexpr.extend_constant(value, n))
|
6663
6664
|
end
|
6664
6665
|
|
@@ -6822,9 +6823,6 @@ module Polars
|
|
6822
6823
|
# @param min_periods [Integer]
|
6823
6824
|
# Number of valid values there should be in the window before the expression
|
6824
6825
|
# is evaluated. valid values = `length - null_count`
|
6825
|
-
# @param parallel [Boolean]
|
6826
|
-
# Run in parallel. Don't do this in a group by or another operation that
|
6827
|
-
# already has much parallelization.
|
6828
6826
|
#
|
6829
6827
|
# @return [Expr]
|
6830
6828
|
#
|
@@ -6858,9 +6856,9 @@ module Polars
|
|
6858
6856
|
# # │ -15 │
|
6859
6857
|
# # │ -24 │
|
6860
6858
|
# # └────────┘
|
6861
|
-
def cumulative_eval(expr, min_periods: 1
|
6859
|
+
def cumulative_eval(expr, min_periods: 1)
|
6862
6860
|
_from_rbexpr(
|
6863
|
-
_rbexpr.cumulative_eval(expr._rbexpr, min_periods
|
6861
|
+
_rbexpr.cumulative_eval(expr._rbexpr, min_periods)
|
6864
6862
|
)
|
6865
6863
|
end
|
6866
6864
|
|
@@ -7125,7 +7123,7 @@ module Polars
|
|
7125
7123
|
# Accepts expression input. Sequences are parsed as Series,
|
7126
7124
|
# other non-expression inputs are parsed as literals.
|
7127
7125
|
# Also accepts a mapping of values to their replacement as syntactic sugar for
|
7128
|
-
# `replace_all(old: Series.new(mapping.keys), new:
|
7126
|
+
# `replace_all(old: Series.new(mapping.keys), new: Series.new(mapping.values))`.
|
7129
7127
|
# @param new [Object]
|
7130
7128
|
# Value or sequence of values to replace by.
|
7131
7129
|
# Accepts expression input. Sequences are parsed as Series,
|
@@ -206,7 +206,7 @@ module Polars
|
|
206
206
|
end
|
207
207
|
end
|
208
208
|
|
209
|
-
# Align a sequence of frames using the
|
209
|
+
# Align a sequence of frames using the unique values from one or more columns as a key.
|
210
210
|
#
|
211
211
|
# Frames that do not contain the given key values have rows injected (with nulls
|
212
212
|
# filling the non-key columns), and each resulting frame is sorted by the key.
|
data/lib/polars/functions/lit.rb
CHANGED
@@ -16,20 +16,15 @@ module Polars
|
|
16
16
|
elsif value.is_a?(::Date)
|
17
17
|
return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
|
18
18
|
elsif value.is_a?(Polars::Series)
|
19
|
-
name = value.name
|
20
19
|
value = value._s
|
21
|
-
|
22
|
-
if name == ""
|
23
|
-
return e
|
24
|
-
end
|
25
|
-
return e.alias(name)
|
20
|
+
return Utils.wrap_expr(Plr.lit(value, allow_object, false))
|
26
21
|
elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
|
27
|
-
return lit(Series.new("", value))
|
22
|
+
return Utils.wrap_expr(Plr.lit(Series.new("literal", [value.to_a], dtype: dtype)._s, allow_object, true))
|
28
23
|
elsif dtype
|
29
|
-
return Utils.wrap_expr(Plr.lit(value, allow_object)).cast(dtype)
|
24
|
+
return Utils.wrap_expr(Plr.lit(value, allow_object, true)).cast(dtype)
|
30
25
|
end
|
31
26
|
|
32
|
-
Utils.wrap_expr(Plr.lit(value, allow_object))
|
27
|
+
Utils.wrap_expr(Plr.lit(value, allow_object, true))
|
33
28
|
end
|
34
29
|
end
|
35
30
|
end
|
data/lib/polars/list_expr.rb
CHANGED
@@ -245,6 +245,11 @@ module Polars
|
|
245
245
|
|
246
246
|
# Sort the arrays in the list.
|
247
247
|
#
|
248
|
+
# @param reverse [Boolean]
|
249
|
+
# Sort in descending order.
|
250
|
+
# @param nulls_last [Boolean]
|
251
|
+
# Place null values last.
|
252
|
+
#
|
248
253
|
# @return [Expr]
|
249
254
|
#
|
250
255
|
# @example
|
@@ -264,8 +269,8 @@ module Polars
|
|
264
269
|
# # │ [1, 2, 3] │
|
265
270
|
# # │ [1, 2, 9] │
|
266
271
|
# # └───────────┘
|
267
|
-
def sort(reverse: false)
|
268
|
-
Utils.wrap_expr(_rbexpr.list_sort(reverse))
|
272
|
+
def sort(reverse: false, nulls_last: false)
|
273
|
+
Utils.wrap_expr(_rbexpr.list_sort(reverse, nulls_last))
|
269
274
|
end
|
270
275
|
|
271
276
|
# Reverse the arrays in the list.
|
@@ -481,6 +486,8 @@ module Polars
|
|
481
486
|
#
|
482
487
|
# @param item [Object]
|
483
488
|
# Item that will be checked for membership
|
489
|
+
# @param nulls_equal [Boolean]
|
490
|
+
# If true, treat null as a distinct value. Null values will not propagate.
|
484
491
|
#
|
485
492
|
# @return [Expr]
|
486
493
|
#
|
@@ -498,8 +505,8 @@ module Polars
|
|
498
505
|
# # │ false │
|
499
506
|
# # │ true │
|
500
507
|
# # └───────┘
|
501
|
-
def contains(item)
|
502
|
-
Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
|
508
|
+
def contains(item, nulls_equal: true)
|
509
|
+
Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item), nulls_equal))
|
503
510
|
end
|
504
511
|
|
505
512
|
# Join all string items in a sublist and place a separator between them.
|
@@ -746,9 +753,9 @@ module Polars
|
|
746
753
|
# # │ {1,2,3} │
|
747
754
|
# # │ {1,2,null} │
|
748
755
|
# # └────────────┘
|
749
|
-
def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
|
756
|
+
def to_struct(n_field_strategy: "first_non_null", name_generator: nil, upper_bound: nil)
|
750
757
|
raise Todo if name_generator
|
751
|
-
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator,
|
758
|
+
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, nil))
|
752
759
|
end
|
753
760
|
|
754
761
|
# Run any polars expression against the lists' elements.
|
@@ -756,12 +763,6 @@ module Polars
|
|
756
763
|
# @param expr [Expr]
|
757
764
|
# Expression to run. Note that you can select an element with `Polars.first`, or
|
758
765
|
# `Polars.col`
|
759
|
-
# @param parallel [Boolean]
|
760
|
-
# Run all expression parallel. Don't activate this blindly.
|
761
|
-
# Parallelism is worth it if there is enough work to do per thread.
|
762
|
-
#
|
763
|
-
# This likely should not be use in the group by context, because we already
|
764
|
-
# parallel execution per group
|
765
766
|
#
|
766
767
|
# @return [Expr]
|
767
768
|
#
|
@@ -781,8 +782,8 @@ module Polars
|
|
781
782
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
782
783
|
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
783
784
|
# # └─────┴─────┴────────────┘
|
784
|
-
def eval(expr
|
785
|
-
Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr
|
785
|
+
def eval(expr)
|
786
|
+
Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr))
|
786
787
|
end
|
787
788
|
end
|
788
789
|
end
|
@@ -586,12 +586,6 @@ module Polars
|
|
586
586
|
# @param expr [Expr]
|
587
587
|
# Expression to run. Note that you can select an element with `Polars.first`, or
|
588
588
|
# `Polars.col`
|
589
|
-
# @param parallel [Boolean]
|
590
|
-
# Run all expression parallel. Don't activate this blindly.
|
591
|
-
# Parallelism is worth it if there is enough work to do per thread.
|
592
|
-
#
|
593
|
-
# This likely should not be use in the group by context, because we already
|
594
|
-
# parallel execution per group
|
595
589
|
#
|
596
590
|
# @return [Series]
|
597
591
|
#
|
@@ -611,7 +605,7 @@ module Polars
|
|
611
605
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
612
606
|
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
613
607
|
# # └─────┴─────┴────────────┘
|
614
|
-
def eval(expr
|
608
|
+
def eval(expr)
|
615
609
|
super
|
616
610
|
end
|
617
611
|
end
|
data/lib/polars/meta_expr.rb
CHANGED
@@ -125,14 +125,14 @@ module Polars
|
|
125
125
|
# @return [Array]
|
126
126
|
#
|
127
127
|
# @example
|
128
|
-
# e = Polars.col("foo").
|
128
|
+
# e = Polars.col("foo") + Polars.col("bar")
|
129
129
|
# first = e.meta.pop[0]
|
130
|
-
# _ = first.meta == Polars.col("foo")
|
131
|
-
# # => true
|
132
130
|
# _ = first.meta == Polars.col("bar")
|
131
|
+
# # => true
|
132
|
+
# _ = first.meta == Polars.col("foo")
|
133
133
|
# # => false
|
134
|
-
def pop
|
135
|
-
_rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
|
134
|
+
def pop(schema: nil)
|
135
|
+
_rbexpr.meta_pop(schema).map { |e| Utils.wrap_expr(e) }
|
136
136
|
end
|
137
137
|
|
138
138
|
# Get a list with the root column name.
|
@@ -209,8 +209,8 @@ module Polars
|
|
209
209
|
# @example
|
210
210
|
# e = (Polars.col("foo") * Polars.col("bar")).sum.over(Polars.col("ham")) / 2
|
211
211
|
# e.meta.tree_format(return_as_string: true)
|
212
|
-
def tree_format(return_as_string: false)
|
213
|
-
s = _rbexpr.meta_tree_format
|
212
|
+
def tree_format(return_as_string: false, schema: nil)
|
213
|
+
s = _rbexpr.meta_tree_format(schema)
|
214
214
|
if return_as_string
|
215
215
|
s
|
216
216
|
else
|
data/lib/polars/series.rb
CHANGED
@@ -1321,9 +1321,6 @@ module Polars
|
|
1321
1321
|
# @param min_periods [Integer]
|
1322
1322
|
# Number of valid values there should be in the window before the expression
|
1323
1323
|
# is evaluated. valid values = `length - null_count`
|
1324
|
-
# @param parallel [Boolean]
|
1325
|
-
# Run in parallel. Don't do this in a group by or another operation that
|
1326
|
-
# already has much parallelization.
|
1327
1324
|
#
|
1328
1325
|
# @return [Series]
|
1329
1326
|
#
|
@@ -1348,7 +1345,7 @@ module Polars
|
|
1348
1345
|
# # -15
|
1349
1346
|
# # -24
|
1350
1347
|
# # ]
|
1351
|
-
def cumulative_eval(expr, min_periods: 1
|
1348
|
+
def cumulative_eval(expr, min_periods: 1)
|
1352
1349
|
super
|
1353
1350
|
end
|
1354
1351
|
|
@@ -1879,6 +1876,13 @@ module Polars
|
|
1879
1876
|
#
|
1880
1877
|
# @param element [Object]
|
1881
1878
|
# Expression or scalar value.
|
1879
|
+
# @param side ['any', 'left', 'right']
|
1880
|
+
# If 'any', the index of the first suitable location found is given.
|
1881
|
+
# If 'left', the index of the leftmost suitable location found is given.
|
1882
|
+
# If 'right', return the rightmost suitable location found is given.
|
1883
|
+
# @param descending [Boolean]
|
1884
|
+
# Boolean indicating whether the values are descending or not (they
|
1885
|
+
# are required to be sorted either way).
|
1882
1886
|
#
|
1883
1887
|
# @return [Integer]
|
1884
1888
|
#
|
@@ -1927,12 +1931,12 @@ module Polars
|
|
1927
1931
|
# # 5
|
1928
1932
|
# # 6
|
1929
1933
|
# # ]
|
1930
|
-
def search_sorted(element, side: "any")
|
1934
|
+
def search_sorted(element, side: "any", descending: false)
|
1931
1935
|
if element.is_a?(Integer) || element.is_a?(Float)
|
1932
|
-
return Polars.select(Polars.lit(self).search_sorted(element, side: side)).item
|
1936
|
+
return Polars.select(Polars.lit(self).search_sorted(element, side: side, descending: descending)).item
|
1933
1937
|
end
|
1934
1938
|
element = Series.new(element)
|
1935
|
-
Polars.select(Polars.lit(self).search_sorted(element, side: side)).to_series
|
1939
|
+
Polars.select(Polars.lit(self).search_sorted(element, side: side, descending: descending)).to_series
|
1936
1940
|
end
|
1937
1941
|
|
1938
1942
|
# Get unique elements in series.
|
@@ -2144,18 +2148,33 @@ module Polars
|
|
2144
2148
|
|
2145
2149
|
# Check if elements of this Series are in the other Series.
|
2146
2150
|
#
|
2151
|
+
# @param nulls_equal [Boolean]
|
2152
|
+
# If true, treat null as a distinct value. Null values will not propagate.
|
2153
|
+
#
|
2147
2154
|
# @return [Series]
|
2148
2155
|
#
|
2149
2156
|
# @example
|
2150
2157
|
# s = Polars::Series.new("a", [1, 2, 3])
|
2151
|
-
# s2 = Polars::Series.new("b", [2, 4])
|
2158
|
+
# s2 = Polars::Series.new("b", [2, 4, nil])
|
2152
2159
|
# s2.is_in(s)
|
2153
2160
|
# # =>
|
2154
|
-
# # shape: (
|
2161
|
+
# # shape: (3,)
|
2155
2162
|
# # Series: 'b' [bool]
|
2156
2163
|
# # [
|
2157
2164
|
# # true
|
2158
2165
|
# # false
|
2166
|
+
# # null
|
2167
|
+
# # ]
|
2168
|
+
#
|
2169
|
+
# @example
|
2170
|
+
# s2.is_in(s, nulls_equal: true)
|
2171
|
+
# # =>
|
2172
|
+
# # shape: (3,)
|
2173
|
+
# # Series: 'b' [bool]
|
2174
|
+
# # [
|
2175
|
+
# # true
|
2176
|
+
# # false
|
2177
|
+
# # false
|
2159
2178
|
# # ]
|
2160
2179
|
#
|
2161
2180
|
# @example
|
@@ -2190,7 +2209,7 @@ module Polars
|
|
2190
2209
|
# # true
|
2191
2210
|
# # false
|
2192
2211
|
# # ]
|
2193
|
-
def is_in(other)
|
2212
|
+
def is_in(other, nulls_equal: false)
|
2194
2213
|
super
|
2195
2214
|
end
|
2196
2215
|
alias_method :in?, :is_in
|
@@ -4351,7 +4370,7 @@ module Polars
|
|
4351
4370
|
# # 99
|
4352
4371
|
# # ]
|
4353
4372
|
def extend_constant(value, n)
|
4354
|
-
|
4373
|
+
super
|
4355
4374
|
end
|
4356
4375
|
|
4357
4376
|
# Flags the Series as sorted.
|
data/lib/polars/string_expr.rb
CHANGED
@@ -1403,7 +1403,7 @@ module Polars
|
|
1403
1403
|
# # │ Can you feel the love tonight ┆ true │
|
1404
1404
|
# # └─────────────────────────────────┴──────────────┘
|
1405
1405
|
def contains_any(patterns, ascii_case_insensitive: false)
|
1406
|
-
patterns = Utils.parse_into_expression(patterns, str_as_lit: false
|
1406
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
|
1407
1407
|
Utils.wrap_expr(
|
1408
1408
|
_rbexpr.str_contains_any(patterns, ascii_case_insensitive)
|
1409
1409
|
)
|
@@ -1474,9 +1474,9 @@ module Polars
|
|
1474
1474
|
# # │ Can you feel the love tonight ┆ Can me feel the love tonight │
|
1475
1475
|
# # └─────────────────────────────────┴─────────────────────────────────┘
|
1476
1476
|
def replace_many(patterns, replace_with, ascii_case_insensitive: false)
|
1477
|
-
patterns = Utils.parse_into_expression(patterns, str_as_lit: false
|
1477
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
|
1478
1478
|
replace_with = Utils.parse_into_expression(
|
1479
|
-
replace_with, str_as_lit: true
|
1479
|
+
replace_with, str_as_lit: true
|
1480
1480
|
)
|
1481
1481
|
Utils.wrap_expr(
|
1482
1482
|
_rbexpr.str_replace_many(
|
@@ -120,7 +120,7 @@ module Polars
|
|
120
120
|
# Parse a Series of dtype Utf8 to a Date/Datetime Series.
|
121
121
|
#
|
122
122
|
# @param datatype [Symbol]
|
123
|
-
# `:date`, `:
|
123
|
+
# `:date`, `:datetime`, or `:time`.
|
124
124
|
# @param fmt [String]
|
125
125
|
# Format to use, refer to the
|
126
126
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
data/lib/polars/version.rb
CHANGED