polars-df 0.17.0 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +4 -10
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +3 -1
- data/ext/polars/src/conversion/any_value.rs +1 -1
- data/ext/polars/src/conversion/mod.rs +18 -0
- data/ext/polars/src/dataframe/general.rs +5 -5
- data/ext/polars/src/expr/general.rs +4 -0
- data/ext/polars/src/functions/lazy.rs +15 -0
- data/ext/polars/src/interop/numo/mod.rs +1 -0
- data/ext/polars/src/interop/numo/numo_rs.rs +52 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +69 -48
- data/ext/polars/src/lazyframe/general.rs +21 -22
- data/ext/polars/src/lib.rs +7 -2
- data/lib/polars/data_frame.rb +304 -6
- data/lib/polars/expr.rb +31 -0
- data/lib/polars/functions/eager.rb +145 -16
- data/lib/polars/io/database.rb +17 -0
- data/lib/polars/lazy_frame.rb +60 -7
- data/lib/polars/schema.rb +29 -0
- data/lib/polars/series.rb +25 -23
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +4 -2
data/lib/polars/lazy_frame.rb
CHANGED
@@ -619,6 +619,10 @@ module Polars
|
|
619
619
|
# Slice pushdown optimization.
|
620
620
|
# @param no_optimization [Boolean]
|
621
621
|
# Turn off (certain) optimizations.
|
622
|
+
# @param storage_options [Object]
|
623
|
+
# Options that indicate how to connect to a cloud provider.
|
624
|
+
# @param retries [Integer]
|
625
|
+
# Number of retries if accessing a cloud instance fails.
|
622
626
|
#
|
623
627
|
# @return [DataFrame]
|
624
628
|
#
|
@@ -646,7 +650,9 @@ module Polars
|
|
646
650
|
projection_pushdown: true,
|
647
651
|
simplify_expression: true,
|
648
652
|
slice_pushdown: true,
|
649
|
-
no_optimization: false
|
653
|
+
no_optimization: false,
|
654
|
+
storage_options: nil,
|
655
|
+
retries: 2
|
650
656
|
)
|
651
657
|
Utils._check_arg_is_1byte("separator", separator, false)
|
652
658
|
Utils._check_arg_is_1byte("quote_char", quote_char, false)
|
@@ -660,6 +666,12 @@ module Polars
|
|
660
666
|
no_optimization: no_optimization
|
661
667
|
)
|
662
668
|
|
669
|
+
if storage_options&.any?
|
670
|
+
storage_options = storage_options.to_a
|
671
|
+
else
|
672
|
+
storage_options = nil
|
673
|
+
end
|
674
|
+
|
663
675
|
lf.sink_csv(
|
664
676
|
path,
|
665
677
|
include_bom,
|
@@ -675,7 +687,9 @@ module Polars
|
|
675
687
|
float_precision,
|
676
688
|
null_value,
|
677
689
|
quote_style,
|
678
|
-
maintain_order
|
690
|
+
maintain_order,
|
691
|
+
storage_options,
|
692
|
+
retries
|
679
693
|
)
|
680
694
|
end
|
681
695
|
|
@@ -1923,6 +1937,24 @@ module Polars
|
|
1923
1937
|
# - true: -> Always coalesce join columns.
|
1924
1938
|
# - false: -> Never coalesce join columns.
|
1925
1939
|
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1940
|
+
# @param maintain_order ['none', 'left', 'right', 'left_right', 'right_left']
|
1941
|
+
# Which DataFrame row order to preserve, if any.
|
1942
|
+
# Do not rely on any observed ordering without explicitly
|
1943
|
+
# setting this parameter, as your code may break in a future release.
|
1944
|
+
# Not specifying any ordering can improve performance
|
1945
|
+
# Supported for inner, left, right and full joins
|
1946
|
+
#
|
1947
|
+
# * *none*
|
1948
|
+
# No specific ordering is desired. The ordering might differ across
|
1949
|
+
# Polars versions or even between different runs.
|
1950
|
+
# * *left*
|
1951
|
+
# Preserves the order of the left DataFrame.
|
1952
|
+
# * *right*
|
1953
|
+
# Preserves the order of the right DataFrame.
|
1954
|
+
# * *left_right*
|
1955
|
+
# First preserves the order of the left DataFrame, then the right.
|
1956
|
+
# * *right_left*
|
1957
|
+
# First preserves the order of the right DataFrame, then the left.
|
1926
1958
|
#
|
1927
1959
|
# @return [LazyFrame]
|
1928
1960
|
#
|
@@ -2016,18 +2048,33 @@ module Polars
|
|
2016
2048
|
join_nulls: false,
|
2017
2049
|
allow_parallel: true,
|
2018
2050
|
force_parallel: false,
|
2019
|
-
coalesce: nil
|
2051
|
+
coalesce: nil,
|
2052
|
+
maintain_order: nil
|
2020
2053
|
)
|
2021
2054
|
if !other.is_a?(LazyFrame)
|
2022
2055
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
2023
2056
|
end
|
2024
2057
|
|
2058
|
+
if maintain_order.nil?
|
2059
|
+
maintain_order = "none"
|
2060
|
+
end
|
2061
|
+
|
2025
2062
|
if how == "outer"
|
2026
2063
|
how = "full"
|
2027
2064
|
elsif how == "cross"
|
2028
2065
|
return _from_rbldf(
|
2029
2066
|
_ldf.join(
|
2030
|
-
other._ldf,
|
2067
|
+
other._ldf,
|
2068
|
+
[],
|
2069
|
+
[],
|
2070
|
+
allow_parallel,
|
2071
|
+
join_nulls,
|
2072
|
+
force_parallel,
|
2073
|
+
how,
|
2074
|
+
suffix,
|
2075
|
+
validate,
|
2076
|
+
maintain_order,
|
2077
|
+
coalesce
|
2031
2078
|
)
|
2032
2079
|
)
|
2033
2080
|
end
|
@@ -2054,6 +2101,7 @@ module Polars
|
|
2054
2101
|
how,
|
2055
2102
|
suffix,
|
2056
2103
|
validate,
|
2104
|
+
maintain_order,
|
2057
2105
|
coalesce
|
2058
2106
|
)
|
2059
2107
|
)
|
@@ -3347,12 +3395,17 @@ module Polars
|
|
3347
3395
|
_from_rbldf(_ldf.merge_sorted(other._ldf, key))
|
3348
3396
|
end
|
3349
3397
|
|
3350
|
-
#
|
3398
|
+
# Flag a column as sorted.
|
3399
|
+
#
|
3400
|
+
# This can speed up future operations.
|
3401
|
+
#
|
3402
|
+
# @note
|
3403
|
+
# This can lead to incorrect results if the data is NOT sorted! Use with care!
|
3351
3404
|
#
|
3352
3405
|
# @param column [Object]
|
3353
|
-
#
|
3406
|
+
# Column that is sorted.
|
3354
3407
|
# @param descending [Boolean]
|
3355
|
-
# Whether the
|
3408
|
+
# Whether the column is sorted in descending order.
|
3356
3409
|
#
|
3357
3410
|
# @return [LazyFrame]
|
3358
3411
|
def set_sorted(
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Polars
|
2
|
+
class Schema
|
3
|
+
def initialize(schema, check_dtypes: true)
|
4
|
+
raise Todo if check_dtypes
|
5
|
+
@schema = schema.to_h
|
6
|
+
end
|
7
|
+
|
8
|
+
def [](key)
|
9
|
+
@schema[key]
|
10
|
+
end
|
11
|
+
|
12
|
+
def names
|
13
|
+
@schema.keys
|
14
|
+
end
|
15
|
+
|
16
|
+
def dtypes
|
17
|
+
@schema.values
|
18
|
+
end
|
19
|
+
|
20
|
+
def length
|
21
|
+
@schema.length
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
"#{self.class.name}(#{@schema})"
|
26
|
+
end
|
27
|
+
alias_method :inspect, :to_s
|
28
|
+
end
|
29
|
+
end
|
data/lib/polars/series.rb
CHANGED
@@ -2554,29 +2554,7 @@ module Polars
|
|
2554
2554
|
# # Numo::Int64#shape=[3]
|
2555
2555
|
# # [1, 2, 3]
|
2556
2556
|
def to_numo
|
2557
|
-
if
|
2558
|
-
if is_datelike
|
2559
|
-
Numo::RObject.cast(to_a)
|
2560
|
-
elsif is_numeric
|
2561
|
-
# TODO make more efficient
|
2562
|
-
{
|
2563
|
-
UInt8 => Numo::UInt8,
|
2564
|
-
UInt16 => Numo::UInt16,
|
2565
|
-
UInt32 => Numo::UInt32,
|
2566
|
-
UInt64 => Numo::UInt64,
|
2567
|
-
Int8 => Numo::Int8,
|
2568
|
-
Int16 => Numo::Int16,
|
2569
|
-
Int32 => Numo::Int32,
|
2570
|
-
Int64 => Numo::Int64,
|
2571
|
-
Float32 => Numo::SFloat,
|
2572
|
-
Float64 => Numo::DFloat
|
2573
|
-
}.fetch(dtype.class).cast(to_a)
|
2574
|
-
elsif is_boolean
|
2575
|
-
Numo::Bit.cast(to_a)
|
2576
|
-
else
|
2577
|
-
_s.to_numo
|
2578
|
-
end
|
2579
|
-
elsif is_datelike
|
2557
|
+
if is_datelike
|
2580
2558
|
Numo::RObject.cast(to_a)
|
2581
2559
|
else
|
2582
2560
|
_s.to_numo
|
@@ -3815,6 +3793,30 @@ module Polars
|
|
3815
3793
|
super
|
3816
3794
|
end
|
3817
3795
|
|
3796
|
+
# Fill null values using interpolation based on another column.
|
3797
|
+
#
|
3798
|
+
# @param by [Expr]
|
3799
|
+
# Column to interpolate values based on.
|
3800
|
+
#
|
3801
|
+
# @return [Series]
|
3802
|
+
#
|
3803
|
+
# @example Fill null values using linear interpolation.
|
3804
|
+
# s = Polars::Series.new("a", [1, nil, nil, 3])
|
3805
|
+
# by = Polars::Series.new("b", [1, 2, 7, 8])
|
3806
|
+
# s.interpolate_by(by)
|
3807
|
+
# # =>
|
3808
|
+
# # shape: (4,)
|
3809
|
+
# # Series: 'a' [f64]
|
3810
|
+
# # [
|
3811
|
+
# # 1.0
|
3812
|
+
# # 1.285714
|
3813
|
+
# # 2.714286
|
3814
|
+
# # 3.0
|
3815
|
+
# # ]
|
3816
|
+
def interpolate_by(by)
|
3817
|
+
super
|
3818
|
+
end
|
3819
|
+
|
3818
3820
|
# Compute absolute values.
|
3819
3821
|
#
|
3820
3822
|
# @return [Series]
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -61,6 +61,7 @@ require_relative "polars/list_name_space"
|
|
61
61
|
require_relative "polars/meta_expr"
|
62
62
|
require_relative "polars/name_expr"
|
63
63
|
require_relative "polars/rolling_group_by"
|
64
|
+
require_relative "polars/schema"
|
64
65
|
require_relative "polars/selectors"
|
65
66
|
require_relative "polars/series"
|
66
67
|
require_relative "polars/slice"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.17.
|
4
|
+
version: 0.17.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-04-13 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: bigdecimal
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- ext/polars/src/interop/arrow/to_ruby.rs
|
94
94
|
- ext/polars/src/interop/mod.rs
|
95
95
|
- ext/polars/src/interop/numo/mod.rs
|
96
|
+
- ext/polars/src/interop/numo/numo_rs.rs
|
96
97
|
- ext/polars/src/interop/numo/to_numo_df.rs
|
97
98
|
- ext/polars/src/interop/numo/to_numo_series.rs
|
98
99
|
- ext/polars/src/lazyframe/general.rs
|
@@ -171,6 +172,7 @@ files:
|
|
171
172
|
- lib/polars/name_expr.rb
|
172
173
|
- lib/polars/plot.rb
|
173
174
|
- lib/polars/rolling_group_by.rb
|
175
|
+
- lib/polars/schema.rb
|
174
176
|
- lib/polars/selectors.rb
|
175
177
|
- lib/polars/series.rb
|
176
178
|
- lib/polars/slice.rb
|