polars-df 0.13.0-x64-mingw-ucrt → 0.15.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE-THIRD-PARTY.txt +24818 -14217
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -3
- data/lib/polars/functions.rb +0 -57
data/lib/polars/string_cache.rb
CHANGED
@@ -1,11 +1,37 @@
|
|
1
1
|
module Polars
|
2
|
-
#
|
2
|
+
# Class for enabling and disabling the global string cache.
|
3
|
+
#
|
4
|
+
# @example Construct two Series using the same global string cache.
|
5
|
+
# s1 = nil
|
6
|
+
# s2 = nil
|
7
|
+
# Polars::StringCache.new do
|
8
|
+
# s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
|
9
|
+
# s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
|
10
|
+
# end
|
11
|
+
#
|
12
|
+
# @example As both Series are constructed under the same global string cache, they can be concatenated.
|
13
|
+
# Polars.concat([s1, s2])
|
14
|
+
# # =>
|
15
|
+
# # shape: (6,)
|
16
|
+
# # Series: 'color' [cat]
|
17
|
+
# # [
|
18
|
+
# # "red"
|
19
|
+
# # "green"
|
20
|
+
# # "red"
|
21
|
+
# # "blue"
|
22
|
+
# # "red"
|
23
|
+
# # "green"
|
24
|
+
# # ]
|
3
25
|
class StringCache
|
4
26
|
def initialize(&block)
|
5
27
|
RbStringCacheHolder.hold(&block)
|
6
28
|
end
|
7
29
|
end
|
8
30
|
|
31
|
+
def self.string_cache(...)
|
32
|
+
StringCache.new(...)
|
33
|
+
end
|
34
|
+
|
9
35
|
module Functions
|
10
36
|
# Enable the global string cache.
|
11
37
|
#
|
data/lib/polars/string_expr.rb
CHANGED
@@ -658,7 +658,18 @@ module Polars
|
|
658
658
|
# An optional single character that should be trimmed
|
659
659
|
#
|
660
660
|
# @return [Series]
|
661
|
-
|
661
|
+
#
|
662
|
+
# @example
|
663
|
+
# s = Polars::Series.new([" hello ", "\tworld"])
|
664
|
+
# s.str.strip_chars
|
665
|
+
# # =>
|
666
|
+
# # shape: (2,)
|
667
|
+
# # Series: '' [str]
|
668
|
+
# # [
|
669
|
+
# # "hello"
|
670
|
+
# # "world"
|
671
|
+
# # ]
|
672
|
+
def strip_chars(matches = nil)
|
662
673
|
super
|
663
674
|
end
|
664
675
|
|
@@ -668,9 +679,21 @@ module Polars
|
|
668
679
|
# An optional single character that should be trimmed
|
669
680
|
#
|
670
681
|
# @return [Series]
|
671
|
-
|
682
|
+
#
|
683
|
+
# @example
|
684
|
+
# s = Polars::Series.new([" hello ", "\tworld"])
|
685
|
+
# s.str.strip_chars_start
|
686
|
+
# # =>
|
687
|
+
# # shape: (2,)
|
688
|
+
# # Series: '' [str]
|
689
|
+
# # [
|
690
|
+
# # "hello "
|
691
|
+
# # "world"
|
692
|
+
# # ]
|
693
|
+
def strip_chars_start(matches = nil)
|
672
694
|
super
|
673
695
|
end
|
696
|
+
alias_method :lstrip, :strip_chars_start
|
674
697
|
|
675
698
|
# Remove trailing whitespace.
|
676
699
|
#
|
@@ -678,9 +701,21 @@ module Polars
|
|
678
701
|
# An optional single character that should be trimmed
|
679
702
|
#
|
680
703
|
# @return [Series]
|
681
|
-
|
704
|
+
#
|
705
|
+
# @example
|
706
|
+
# s = Polars::Series.new([" hello ", "world\t"])
|
707
|
+
# s.str.strip_chars_end
|
708
|
+
# # =>
|
709
|
+
# # shape: (2,)
|
710
|
+
# # Series: '' [str]
|
711
|
+
# # [
|
712
|
+
# # " hello"
|
713
|
+
# # "world"
|
714
|
+
# # ]
|
715
|
+
def strip_chars_end(matches = nil)
|
682
716
|
super
|
683
717
|
end
|
718
|
+
alias_method :rstrip, :strip_chars_end
|
684
719
|
|
685
720
|
# Fills the string with zeroes.
|
686
721
|
#
|
@@ -695,6 +730,19 @@ module Polars
|
|
695
730
|
# Fill the value up to this length.
|
696
731
|
#
|
697
732
|
# @return [Series]
|
733
|
+
#
|
734
|
+
# @example
|
735
|
+
# s = Polars::Series.new([-1, 123, 999999, nil])
|
736
|
+
# s.cast(Polars::String).str.zfill(4)
|
737
|
+
# # =>
|
738
|
+
# # shape: (4,)
|
739
|
+
# # Series: '' [str]
|
740
|
+
# # [
|
741
|
+
# # "-001"
|
742
|
+
# # "0123"
|
743
|
+
# # "999999"
|
744
|
+
# # null
|
745
|
+
# # ]
|
698
746
|
def zfill(length)
|
699
747
|
super
|
700
748
|
end
|
@@ -758,6 +806,17 @@ module Polars
|
|
758
806
|
# Modify the strings to their lowercase equivalent.
|
759
807
|
#
|
760
808
|
# @return [Series]
|
809
|
+
#
|
810
|
+
# @example
|
811
|
+
# s = Polars::Series.new("foo", ["CAT", "DOG"])
|
812
|
+
# s.str.to_lowercase
|
813
|
+
# # =>
|
814
|
+
# # shape: (2,)
|
815
|
+
# # Series: 'foo' [str]
|
816
|
+
# # [
|
817
|
+
# # "cat"
|
818
|
+
# # "dog"
|
819
|
+
# # ]
|
761
820
|
def to_lowercase
|
762
821
|
super
|
763
822
|
end
|
@@ -765,6 +824,17 @@ module Polars
|
|
765
824
|
# Modify the strings to their uppercase equivalent.
|
766
825
|
#
|
767
826
|
# @return [Series]
|
827
|
+
#
|
828
|
+
# @example
|
829
|
+
# s = Polars::Series.new("foo", ["cat", "dog"])
|
830
|
+
# s.str.to_uppercase
|
831
|
+
# # =>
|
832
|
+
# # shape: (2,)
|
833
|
+
# # Series: 'foo' [str]
|
834
|
+
# # [
|
835
|
+
# # "CAT"
|
836
|
+
# # "DOG"
|
837
|
+
# # ]
|
768
838
|
def to_uppercase
|
769
839
|
super
|
770
840
|
end
|
@@ -23,16 +23,14 @@ module Polars
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
# Convert this Struct Series to a DataFrame.
|
27
|
-
#
|
28
|
-
# @return [DataFrame]
|
29
|
-
def to_frame
|
30
|
-
Utils.wrap_df(_s.struct_to_frame)
|
31
|
-
end
|
32
|
-
|
33
26
|
# Get the names of the fields.
|
34
27
|
#
|
35
28
|
# @return [Array]
|
29
|
+
#
|
30
|
+
# @example
|
31
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
32
|
+
# s.struct.fields
|
33
|
+
# # => ["a", "b"]
|
36
34
|
def fields
|
37
35
|
if _s.nil?
|
38
36
|
[]
|
@@ -47,6 +45,17 @@ module Polars
|
|
47
45
|
# Name of the field
|
48
46
|
#
|
49
47
|
# @return [Series]
|
48
|
+
#
|
49
|
+
# @example
|
50
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
51
|
+
# s.struct.field("a")
|
52
|
+
# # =>
|
53
|
+
# # shape: (2,)
|
54
|
+
# # Series: 'a' [i64]
|
55
|
+
# # [
|
56
|
+
# # 1
|
57
|
+
# # 3
|
58
|
+
# # ]
|
50
59
|
def field(name)
|
51
60
|
super
|
52
61
|
end
|
@@ -57,6 +66,16 @@ module Polars
|
|
57
66
|
# New names in the order of the struct's fields
|
58
67
|
#
|
59
68
|
# @return [Series]
|
69
|
+
#
|
70
|
+
# @example
|
71
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
72
|
+
# s.struct.fields
|
73
|
+
# # => ["a", "b"]
|
74
|
+
#
|
75
|
+
# @example
|
76
|
+
# s = s.struct.rename_fields(["c", "d"])
|
77
|
+
# s.struct.fields
|
78
|
+
# # => ["c", "d"]
|
60
79
|
def rename_fields(names)
|
61
80
|
super
|
62
81
|
end
|
@@ -64,6 +83,11 @@ module Polars
|
|
64
83
|
# Get the struct definition as a name/dtype schema dict.
|
65
84
|
#
|
66
85
|
# @return [Object]
|
86
|
+
#
|
87
|
+
# @example
|
88
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
89
|
+
# s.struct.schema
|
90
|
+
# # => {"a"=>Polars::Int64, "b"=>Polars::Int64}
|
67
91
|
def schema
|
68
92
|
if _s.nil?
|
69
93
|
{}
|
data/lib/polars/utils/various.rb
CHANGED
@@ -12,6 +12,10 @@ module Polars
|
|
12
12
|
val.all? { |x| x.is_a?(eltype) }
|
13
13
|
end
|
14
14
|
|
15
|
+
def self.is_path_or_str_sequence(val)
|
16
|
+
val.is_a?(::Array) && val.all? { |x| pathlike?(x) }
|
17
|
+
end
|
18
|
+
|
15
19
|
def self.is_bool_sequence(val)
|
16
20
|
val.is_a?(::Array) && val.all? { |x| x == true || x == false }
|
17
21
|
end
|
@@ -42,7 +46,7 @@ module Polars
|
|
42
46
|
end
|
43
47
|
|
44
48
|
def self.normalize_filepath(path, check_not_directory: true)
|
45
|
-
path = File.expand_path(path)
|
49
|
+
path = File.expand_path(path) if !path.is_a?(::String) || path.start_with?("~")
|
46
50
|
if check_not_directory && File.exist?(path) && Dir.exist?(path)
|
47
51
|
raise ArgumentError, "Expected a file path; #{path} is a directory"
|
48
52
|
end
|
data/lib/polars/utils.rb
CHANGED
@@ -3,12 +3,18 @@ module Polars
|
|
3
3
|
module Utils
|
4
4
|
DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
def self.is_polars_dtype(dtype, include_unknown: false)
|
7
|
+
is_dtype = dtype.is_a?(Symbol) || dtype.is_a?(::String) || dtype.is_a?(DataType) || (dtype.is_a?(Class) && dtype < DataType)
|
8
|
+
|
9
|
+
if !include_unknown
|
10
|
+
is_dtype && dtype != Unknown
|
11
|
+
else
|
12
|
+
is_dtype
|
10
13
|
end
|
11
|
-
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.is_column(obj)
|
17
|
+
obj.is_a?(Expr) && obj.meta.is_column
|
12
18
|
end
|
13
19
|
|
14
20
|
def self.map_rb_type_to_dtype(ruby_dtype)
|
@@ -100,6 +106,27 @@ module Polars
|
|
100
106
|
end
|
101
107
|
end
|
102
108
|
|
109
|
+
def self.is_selector(obj)
|
110
|
+
obj.is_a?(Selectors::SelectorProxy)
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.expand_selector(target, selector, strict: true)
|
114
|
+
if target.is_a?(Hash)
|
115
|
+
target = DataFrame.new(schema: target)
|
116
|
+
end
|
117
|
+
|
118
|
+
if !is_selector(selector) && !is_polars_dtype(selector)
|
119
|
+
msg = "expected a selector; found #{selector.inspect} instead."
|
120
|
+
raise TypeError, msg
|
121
|
+
end
|
122
|
+
|
123
|
+
if is_selector(selector)
|
124
|
+
target.select(selector).columns
|
125
|
+
else
|
126
|
+
target.select(Polars.col(selector)).columns
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
103
130
|
def self._expand_selectors(frame, *items)
|
104
131
|
items_iter = _parse_inputs_as_iterable(items)
|
105
132
|
|
@@ -115,16 +142,24 @@ module Polars
|
|
115
142
|
expanded
|
116
143
|
end
|
117
144
|
|
118
|
-
# TODO
|
119
|
-
def self.is_selector(obj)
|
120
|
-
false
|
121
|
-
end
|
122
|
-
|
123
145
|
def self.parse_interval_argument(interval)
|
124
146
|
if interval.include?(" ")
|
125
147
|
interval = interval.gsub(" ", "")
|
126
148
|
end
|
127
149
|
interval.downcase
|
128
150
|
end
|
151
|
+
|
152
|
+
def self.parse_into_dtype(input)
|
153
|
+
if is_polars_dtype(input)
|
154
|
+
input
|
155
|
+
else
|
156
|
+
raise Todo
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def self.re_escape(s)
|
161
|
+
# escapes _only_ those metachars with meaning to the rust regex crate
|
162
|
+
Plr.re_escape(s)
|
163
|
+
end
|
129
164
|
end
|
130
165
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -24,12 +24,12 @@ require_relative "polars/convert"
|
|
24
24
|
require_relative "polars/plot"
|
25
25
|
require_relative "polars/data_frame"
|
26
26
|
require_relative "polars/data_types"
|
27
|
+
require_relative "polars/data_type_group"
|
27
28
|
require_relative "polars/date_time_expr"
|
28
29
|
require_relative "polars/date_time_name_space"
|
29
30
|
require_relative "polars/dynamic_group_by"
|
30
31
|
require_relative "polars/exceptions"
|
31
32
|
require_relative "polars/expr"
|
32
|
-
require_relative "polars/functions"
|
33
33
|
require_relative "polars/functions/as_datatype"
|
34
34
|
require_relative "polars/functions/col"
|
35
35
|
require_relative "polars/functions/eager"
|
@@ -60,6 +60,7 @@ require_relative "polars/list_name_space"
|
|
60
60
|
require_relative "polars/meta_expr"
|
61
61
|
require_relative "polars/name_expr"
|
62
62
|
require_relative "polars/rolling_group_by"
|
63
|
+
require_relative "polars/selectors"
|
63
64
|
require_relative "polars/series"
|
64
65
|
require_relative "polars/slice"
|
65
66
|
require_relative "polars/sql_context"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.0
|
5
5
|
platform: x64-mingw-ucrt
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -53,6 +53,7 @@ files:
|
|
53
53
|
- lib/polars/config.rb
|
54
54
|
- lib/polars/convert.rb
|
55
55
|
- lib/polars/data_frame.rb
|
56
|
+
- lib/polars/data_type_group.rb
|
56
57
|
- lib/polars/data_types.rb
|
57
58
|
- lib/polars/date_time_expr.rb
|
58
59
|
- lib/polars/date_time_name_space.rb
|
@@ -60,7 +61,6 @@ files:
|
|
60
61
|
- lib/polars/exceptions.rb
|
61
62
|
- lib/polars/expr.rb
|
62
63
|
- lib/polars/expr_dispatch.rb
|
63
|
-
- lib/polars/functions.rb
|
64
64
|
- lib/polars/functions/aggregation/horizontal.rb
|
65
65
|
- lib/polars/functions/aggregation/vertical.rb
|
66
66
|
- lib/polars/functions/as_datatype.rb
|
@@ -92,6 +92,7 @@ files:
|
|
92
92
|
- lib/polars/name_expr.rb
|
93
93
|
- lib/polars/plot.rb
|
94
94
|
- lib/polars/rolling_group_by.rb
|
95
|
+
- lib/polars/selectors.rb
|
95
96
|
- lib/polars/series.rb
|
96
97
|
- lib/polars/slice.rb
|
97
98
|
- lib/polars/sql_context.rb
|
data/lib/polars/functions.rb
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
module Polars
|
2
|
-
module Functions
|
3
|
-
# Convert categorical variables into dummy/indicator variables.
|
4
|
-
#
|
5
|
-
# @param df [DataFrame]
|
6
|
-
# DataFrame to convert.
|
7
|
-
# @param columns [Array, nil]
|
8
|
-
# A subset of columns to convert to dummy variables. `nil` means
|
9
|
-
# "all columns".
|
10
|
-
#
|
11
|
-
# @return [DataFrame]
|
12
|
-
def get_dummies(df, columns: nil)
|
13
|
-
df.to_dummies(columns: columns)
|
14
|
-
end
|
15
|
-
|
16
|
-
# Aggregate to list.
|
17
|
-
#
|
18
|
-
# @return [Expr]
|
19
|
-
def to_list(name)
|
20
|
-
col(name).list
|
21
|
-
end
|
22
|
-
|
23
|
-
# Compute the spearman rank correlation between two columns.
|
24
|
-
#
|
25
|
-
# Missing data will be excluded from the computation.
|
26
|
-
#
|
27
|
-
# @param a [Object]
|
28
|
-
# Column name or Expression.
|
29
|
-
# @param b [Object]
|
30
|
-
# Column name or Expression.
|
31
|
-
# @param ddof [Integer]
|
32
|
-
# Delta degrees of freedom
|
33
|
-
# @param propagate_nans [Boolean]
|
34
|
-
# If `True` any `NaN` encountered will lead to `NaN` in the output.
|
35
|
-
# Defaults to `False` where `NaN` are regarded as larger than any finite number
|
36
|
-
# and thus lead to the highest rank.
|
37
|
-
#
|
38
|
-
# @return [Expr]
|
39
|
-
def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
|
40
|
-
corr(a, b, method: "spearman", ddof: ddof, propagate_nans: propagate_nans)
|
41
|
-
end
|
42
|
-
|
43
|
-
# Compute the pearson's correlation between two columns.
|
44
|
-
#
|
45
|
-
# @param a [Object]
|
46
|
-
# Column name or Expression.
|
47
|
-
# @param b [Object]
|
48
|
-
# Column name or Expression.
|
49
|
-
# @param ddof [Integer]
|
50
|
-
# Delta degrees of freedom
|
51
|
-
#
|
52
|
-
# @return [Expr]
|
53
|
-
def pearson_corr(a, b, ddof: 1)
|
54
|
-
corr(a, b, method: "pearson", ddof: ddof)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|