polars-df 0.17.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +725 -453
- data/ext/polars/Cargo.toml +8 -8
- data/ext/polars/src/conversion/any_value.rs +1 -1
- data/ext/polars/src/conversion/mod.rs +38 -7
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -1
- data/ext/polars/src/expr/array.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +16 -9
- data/ext/polars/src/expr/general.rs +12 -14
- data/ext/polars/src/expr/list.rs +3 -3
- data/ext/polars/src/expr/rolling.rs +17 -2
- data/ext/polars/src/expr/string.rs +2 -2
- data/ext/polars/src/file.rs +56 -14
- data/ext/polars/src/functions/lazy.rs +26 -4
- data/ext/polars/src/functions/range.rs +4 -4
- data/ext/polars/src/lazyframe/general.rs +87 -48
- data/ext/polars/src/lazyframe/mod.rs +2 -0
- data/ext/polars/src/lazyframe/sink.rs +99 -0
- data/ext/polars/src/lib.rs +7 -9
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/map/series.rs +4 -4
- data/ext/polars/src/on_startup.rs +15 -3
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/series/general.rs +2 -2
- data/lib/polars/array_expr.rb +4 -2
- data/lib/polars/expr.rb +28 -28
- data/lib/polars/functions/lit.rb +4 -9
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/lazy_frame.rb +78 -14
- data/lib/polars/list_expr.rb +10 -11
- data/lib/polars/series.rb +29 -12
- data/lib/polars/string_expr.rb +3 -3
- data/lib/polars/version.rb +1 -1
- metadata +4 -3
data/lib/polars/functions/lit.rb
CHANGED
@@ -16,20 +16,15 @@ module Polars
|
|
16
16
|
elsif value.is_a?(::Date)
|
17
17
|
return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
|
18
18
|
elsif value.is_a?(Polars::Series)
|
19
|
-
name = value.name
|
20
19
|
value = value._s
|
21
|
-
|
22
|
-
if name == ""
|
23
|
-
return e
|
24
|
-
end
|
25
|
-
return e.alias(name)
|
20
|
+
return Utils.wrap_expr(Plr.lit(value, allow_object, false))
|
26
21
|
elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
|
27
|
-
return lit(Series.new("", value))
|
22
|
+
return Utils.wrap_expr(Plr.lit(Series.new("literal", [value.to_a], dtype: dtype)._s, allow_object, true))
|
28
23
|
elsif dtype
|
29
|
-
return Utils.wrap_expr(Plr.lit(value, allow_object)).cast(dtype)
|
24
|
+
return Utils.wrap_expr(Plr.lit(value, allow_object, true)).cast(dtype)
|
30
25
|
end
|
31
26
|
|
32
|
-
Utils.wrap_expr(Plr.lit(value, allow_object))
|
27
|
+
Utils.wrap_expr(Plr.lit(value, allow_object, true))
|
33
28
|
end
|
34
29
|
end
|
35
30
|
end
|
data/lib/polars/io/database.rb
CHANGED
@@ -51,7 +51,7 @@ module Polars
|
|
51
51
|
when :decimal
|
52
52
|
Decimal
|
53
53
|
when :float
|
54
|
-
# TODO uncomment in
|
54
|
+
# TODO uncomment in future release
|
55
55
|
# if column_type.limit && column_type.limit <= 24
|
56
56
|
# Float32
|
57
57
|
# else
|
@@ -59,7 +59,7 @@ module Polars
|
|
59
59
|
# end
|
60
60
|
Float64
|
61
61
|
when :integer
|
62
|
-
# TODO uncomment in
|
62
|
+
# TODO uncomment in future release
|
63
63
|
# case column_type.limit
|
64
64
|
# when 1
|
65
65
|
# Int8
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -433,7 +433,10 @@ module Polars
|
|
433
433
|
no_optimization: false,
|
434
434
|
slice_pushdown: true,
|
435
435
|
storage_options: nil,
|
436
|
-
retries: 2
|
436
|
+
retries: 2,
|
437
|
+
sync_on_close: nil,
|
438
|
+
mkdir: false,
|
439
|
+
lazy: false
|
437
440
|
)
|
438
441
|
lf = _set_sink_optimizations(
|
439
442
|
type_coercion: type_coercion,
|
@@ -468,17 +471,30 @@ module Polars
|
|
468
471
|
storage_options = nil
|
469
472
|
end
|
470
473
|
|
471
|
-
|
474
|
+
sink_options = {
|
475
|
+
"sync_on_close" => sync_on_close || "none",
|
476
|
+
"maintain_order" => maintain_order,
|
477
|
+
"mkdir" => mkdir
|
478
|
+
}
|
479
|
+
|
480
|
+
lf = lf.sink_parquet(
|
472
481
|
path,
|
473
482
|
compression,
|
474
483
|
compression_level,
|
475
484
|
statistics,
|
476
485
|
row_group_size,
|
477
486
|
data_pagesize_limit,
|
478
|
-
maintain_order,
|
479
487
|
storage_options,
|
480
|
-
retries
|
488
|
+
retries,
|
489
|
+
sink_options
|
481
490
|
)
|
491
|
+
lf = LazyFrame._from_rbldf(lf)
|
492
|
+
|
493
|
+
if !lazy
|
494
|
+
lf.collect
|
495
|
+
return nil
|
496
|
+
end
|
497
|
+
lf
|
482
498
|
end
|
483
499
|
|
484
500
|
# Evaluate the query in streaming mode and write to an IPC file.
|
@@ -520,7 +536,10 @@ module Polars
|
|
520
536
|
projection_pushdown: true,
|
521
537
|
simplify_expression: true,
|
522
538
|
slice_pushdown: true,
|
523
|
-
no_optimization: false
|
539
|
+
no_optimization: false,
|
540
|
+
sync_on_close: nil,
|
541
|
+
mkdir: false,
|
542
|
+
lazy: false
|
524
543
|
)
|
525
544
|
# TODO support storage options in Rust
|
526
545
|
storage_options = nil
|
@@ -541,13 +560,26 @@ module Polars
|
|
541
560
|
storage_options = nil
|
542
561
|
end
|
543
562
|
|
544
|
-
|
563
|
+
sink_options = {
|
564
|
+
"sync_on_close" => sync_on_close || "none",
|
565
|
+
"maintain_order" => maintain_order,
|
566
|
+
"mkdir" => mkdir
|
567
|
+
}
|
568
|
+
|
569
|
+
lf = lf.sink_ipc(
|
545
570
|
path,
|
546
571
|
compression,
|
547
|
-
maintain_order,
|
548
572
|
storage_options,
|
549
|
-
retries
|
573
|
+
retries,
|
574
|
+
sink_options
|
550
575
|
)
|
576
|
+
lf = LazyFrame._from_rbldf(lf)
|
577
|
+
|
578
|
+
if !lazy
|
579
|
+
lf.collect
|
580
|
+
return nil
|
581
|
+
end
|
582
|
+
lf
|
551
583
|
end
|
552
584
|
|
553
585
|
# Evaluate the query in streaming mode and write to a CSV file.
|
@@ -652,7 +684,10 @@ module Polars
|
|
652
684
|
slice_pushdown: true,
|
653
685
|
no_optimization: false,
|
654
686
|
storage_options: nil,
|
655
|
-
retries: 2
|
687
|
+
retries: 2,
|
688
|
+
sync_on_close: nil,
|
689
|
+
mkdir: false,
|
690
|
+
lazy: false
|
656
691
|
)
|
657
692
|
Utils._check_arg_is_1byte("separator", separator, false)
|
658
693
|
Utils._check_arg_is_1byte("quote_char", quote_char, false)
|
@@ -672,7 +707,13 @@ module Polars
|
|
672
707
|
storage_options = nil
|
673
708
|
end
|
674
709
|
|
675
|
-
|
710
|
+
sink_options = {
|
711
|
+
"sync_on_close" => sync_on_close || "none",
|
712
|
+
"maintain_order" => maintain_order,
|
713
|
+
"mkdir" => mkdir
|
714
|
+
}
|
715
|
+
|
716
|
+
lf = lf.sink_csv(
|
676
717
|
path,
|
677
718
|
include_bom,
|
678
719
|
include_header,
|
@@ -687,10 +728,17 @@ module Polars
|
|
687
728
|
float_precision,
|
688
729
|
null_value,
|
689
730
|
quote_style,
|
690
|
-
maintain_order,
|
691
731
|
storage_options,
|
692
|
-
retries
|
732
|
+
retries,
|
733
|
+
sink_options
|
693
734
|
)
|
735
|
+
lf = LazyFrame._from_rbldf(lf)
|
736
|
+
|
737
|
+
if !lazy
|
738
|
+
lf.collect
|
739
|
+
return nil
|
740
|
+
end
|
741
|
+
lf
|
694
742
|
end
|
695
743
|
|
696
744
|
# Evaluate the query in streaming mode and write to an NDJSON file.
|
@@ -730,7 +778,10 @@ module Polars
|
|
730
778
|
slice_pushdown: true,
|
731
779
|
no_optimization: false,
|
732
780
|
storage_options: nil,
|
733
|
-
retries: 2
|
781
|
+
retries: 2,
|
782
|
+
sync_on_close: nil,
|
783
|
+
mkdir: false,
|
784
|
+
lazy: false
|
734
785
|
)
|
735
786
|
lf = _set_sink_optimizations(
|
736
787
|
type_coercion: type_coercion,
|
@@ -747,7 +798,20 @@ module Polars
|
|
747
798
|
storage_options = nil
|
748
799
|
end
|
749
800
|
|
750
|
-
|
801
|
+
sink_options = {
|
802
|
+
"sync_on_close" => sync_on_close || "none",
|
803
|
+
"maintain_order" => maintain_order,
|
804
|
+
"mkdir" => mkdir
|
805
|
+
}
|
806
|
+
|
807
|
+
lf = lf.sink_json(path, storage_options, retries, sink_options)
|
808
|
+
lf = LazyFrame._from_rbldf(lf)
|
809
|
+
|
810
|
+
if !lazy
|
811
|
+
lf.collect
|
812
|
+
return nil
|
813
|
+
end
|
814
|
+
lf
|
751
815
|
end
|
752
816
|
|
753
817
|
# @private
|
data/lib/polars/list_expr.rb
CHANGED
@@ -403,7 +403,7 @@ module Polars
|
|
403
403
|
# The indices may be defined in a single column, or by sublists in another
|
404
404
|
# column of dtype `List`.
|
405
405
|
#
|
406
|
-
# @param
|
406
|
+
# @param indices [Object]
|
407
407
|
# Indices to return per sublist
|
408
408
|
# @param null_on_oob [Boolean]
|
409
409
|
# Behavior if an index is out of bounds:
|
@@ -427,12 +427,9 @@ module Polars
|
|
427
427
|
# # │ [] ┆ [null, null] │
|
428
428
|
# # │ [1, 2, … 5] ┆ [1, 5] │
|
429
429
|
# # └─────────────┴──────────────┘
|
430
|
-
def gather(
|
431
|
-
|
432
|
-
|
433
|
-
end
|
434
|
-
index = Utils.parse_into_expression(index, str_as_lit: false)
|
435
|
-
Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
|
430
|
+
def gather(indices, null_on_oob: false)
|
431
|
+
indices = Utils.parse_into_expression(indices)
|
432
|
+
Utils.wrap_expr(_rbexpr.list_gather(indices, null_on_oob))
|
436
433
|
end
|
437
434
|
alias_method :take, :gather
|
438
435
|
|
@@ -484,6 +481,8 @@ module Polars
|
|
484
481
|
#
|
485
482
|
# @param item [Object]
|
486
483
|
# Item that will be checked for membership
|
484
|
+
# @param nulls_equal [Boolean]
|
485
|
+
# If true, treat null as a distinct value. Null values will not propagate.
|
487
486
|
#
|
488
487
|
# @return [Expr]
|
489
488
|
#
|
@@ -501,8 +500,8 @@ module Polars
|
|
501
500
|
# # │ false │
|
502
501
|
# # │ true │
|
503
502
|
# # └───────┘
|
504
|
-
def contains(item)
|
505
|
-
Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
|
503
|
+
def contains(item, nulls_equal: true)
|
504
|
+
Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item), nulls_equal))
|
506
505
|
end
|
507
506
|
|
508
507
|
# Join all string items in a sublist and place a separator between them.
|
@@ -749,9 +748,9 @@ module Polars
|
|
749
748
|
# # │ {1,2,3} │
|
750
749
|
# # │ {1,2,null} │
|
751
750
|
# # └────────────┘
|
752
|
-
def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
|
751
|
+
def to_struct(n_field_strategy: "first_non_null", name_generator: nil, upper_bound: nil)
|
753
752
|
raise Todo if name_generator
|
754
|
-
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator,
|
753
|
+
Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, nil))
|
755
754
|
end
|
756
755
|
|
757
756
|
# Run any polars expression against the lists' elements.
|
data/lib/polars/series.rb
CHANGED
@@ -2144,18 +2144,33 @@ module Polars
|
|
2144
2144
|
|
2145
2145
|
# Check if elements of this Series are in the other Series.
|
2146
2146
|
#
|
2147
|
+
# @param nulls_equal [Boolean]
|
2148
|
+
# If true, treat null as a distinct value. Null values will not propagate.
|
2149
|
+
#
|
2147
2150
|
# @return [Series]
|
2148
2151
|
#
|
2149
2152
|
# @example
|
2150
2153
|
# s = Polars::Series.new("a", [1, 2, 3])
|
2151
|
-
# s2 = Polars::Series.new("b", [2, 4])
|
2154
|
+
# s2 = Polars::Series.new("b", [2, 4, nil])
|
2152
2155
|
# s2.is_in(s)
|
2153
2156
|
# # =>
|
2154
|
-
# # shape: (
|
2157
|
+
# # shape: (3,)
|
2158
|
+
# # Series: 'b' [bool]
|
2159
|
+
# # [
|
2160
|
+
# # true
|
2161
|
+
# # false
|
2162
|
+
# # null
|
2163
|
+
# # ]
|
2164
|
+
#
|
2165
|
+
# @example
|
2166
|
+
# s2.is_in(s, nulls_equal: true)
|
2167
|
+
# # =>
|
2168
|
+
# # shape: (3,)
|
2155
2169
|
# # Series: 'b' [bool]
|
2156
2170
|
# # [
|
2157
2171
|
# # true
|
2158
2172
|
# # false
|
2173
|
+
# # false
|
2159
2174
|
# # ]
|
2160
2175
|
#
|
2161
2176
|
# @example
|
@@ -2190,7 +2205,7 @@ module Polars
|
|
2190
2205
|
# # true
|
2191
2206
|
# # false
|
2192
2207
|
# # ]
|
2193
|
-
def is_in(other)
|
2208
|
+
def is_in(other, nulls_equal: false)
|
2194
2209
|
super
|
2195
2210
|
end
|
2196
2211
|
alias_method :in?, :is_in
|
@@ -3577,24 +3592,26 @@ module Polars
|
|
3577
3592
|
# Integer size of the rolling window.
|
3578
3593
|
# @param bias [Boolean]
|
3579
3594
|
# If false, the calculations are corrected for statistical bias.
|
3595
|
+
# @param min_samples [Integer]
|
3596
|
+
# The number of values in the window that should be non-null before computing
|
3597
|
+
# a result. If set to `nil` (default), it will be set equal to `window_size`.
|
3598
|
+
# @param center [Boolean]
|
3599
|
+
# Set the labels at the center of the window.
|
3580
3600
|
#
|
3581
3601
|
# @return [Series]
|
3582
3602
|
#
|
3583
3603
|
# @example
|
3584
|
-
#
|
3585
|
-
# s.rolling_skew(3)
|
3604
|
+
# Polars::Series.new([1, 4, 2, 9]).rolling_skew(3)
|
3586
3605
|
# # =>
|
3587
|
-
# # shape: (
|
3588
|
-
# # Series: '
|
3606
|
+
# # shape: (4,)
|
3607
|
+
# # Series: '' [f64]
|
3589
3608
|
# # [
|
3590
3609
|
# # null
|
3591
3610
|
# # null
|
3592
|
-
# # 0.0
|
3593
|
-
# # 0.0
|
3594
3611
|
# # 0.381802
|
3595
|
-
# # 0.
|
3612
|
+
# # 0.47033
|
3596
3613
|
# # ]
|
3597
|
-
def rolling_skew(window_size, bias: true)
|
3614
|
+
def rolling_skew(window_size, bias: true, min_samples: nil, center: false)
|
3598
3615
|
super
|
3599
3616
|
end
|
3600
3617
|
|
@@ -4043,7 +4060,7 @@ module Polars
|
|
4043
4060
|
#
|
4044
4061
|
# @example
|
4045
4062
|
# s.kurtosis(fisher: false, bias: false)
|
4046
|
-
# # => 2.
|
4063
|
+
# # => 2.1040361802642717
|
4047
4064
|
def kurtosis(fisher: true, bias: true)
|
4048
4065
|
_s.kurtosis(fisher, bias)
|
4049
4066
|
end
|
data/lib/polars/string_expr.rb
CHANGED
@@ -1403,7 +1403,7 @@ module Polars
|
|
1403
1403
|
# # │ Can you feel the love tonight ┆ true │
|
1404
1404
|
# # └─────────────────────────────────┴──────────────┘
|
1405
1405
|
def contains_any(patterns, ascii_case_insensitive: false)
|
1406
|
-
patterns = Utils.parse_into_expression(patterns, str_as_lit: false
|
1406
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
|
1407
1407
|
Utils.wrap_expr(
|
1408
1408
|
_rbexpr.str_contains_any(patterns, ascii_case_insensitive)
|
1409
1409
|
)
|
@@ -1474,9 +1474,9 @@ module Polars
|
|
1474
1474
|
# # │ Can you feel the love tonight ┆ Can me feel the love tonight │
|
1475
1475
|
# # └─────────────────────────────────┴─────────────────────────────────┘
|
1476
1476
|
def replace_many(patterns, replace_with, ascii_case_insensitive: false)
|
1477
|
-
patterns = Utils.parse_into_expression(patterns, str_as_lit: false
|
1477
|
+
patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
|
1478
1478
|
replace_with = Utils.parse_into_expression(
|
1479
|
-
replace_with, str_as_lit: true
|
1479
|
+
replace_with, str_as_lit: true
|
1480
1480
|
)
|
1481
1481
|
Utils.wrap_expr(
|
1482
1482
|
_rbexpr.str_replace_many(
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: bigdecimal
|
@@ -99,6 +99,7 @@ files:
|
|
99
99
|
- ext/polars/src/lazyframe/general.rs
|
100
100
|
- ext/polars/src/lazyframe/mod.rs
|
101
101
|
- ext/polars/src/lazyframe/serde.rs
|
102
|
+
- ext/polars/src/lazyframe/sink.rs
|
102
103
|
- ext/polars/src/lazygroupby.rs
|
103
104
|
- ext/polars/src/lib.rs
|
104
105
|
- ext/polars/src/map/dataframe.rs
|
@@ -209,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
209
210
|
- !ruby/object:Gem::Version
|
210
211
|
version: '0'
|
211
212
|
requirements: []
|
212
|
-
rubygems_version: 3.6.
|
213
|
+
rubygems_version: 3.6.7
|
213
214
|
specification_version: 4
|
214
215
|
summary: Blazingly fast DataFrames for Ruby
|
215
216
|
test_files: []
|