polars-df 0.14.0-arm64-darwin → 0.15.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Cargo.lock +1296 -283
- data/LICENSE-THIRD-PARTY.txt +25771 -13906
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +275 -52
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -3
- data/lib/polars/functions.rb +0 -57
data/lib/polars/string_cache.rb
CHANGED
@@ -1,11 +1,37 @@
|
|
1
1
|
module Polars
|
2
|
-
#
|
2
|
+
# Class for enabling and disabling the global string cache.
|
3
|
+
#
|
4
|
+
# @example Construct two Series using the same global string cache.
|
5
|
+
# s1 = nil
|
6
|
+
# s2 = nil
|
7
|
+
# Polars::StringCache.new do
|
8
|
+
# s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
|
9
|
+
# s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
|
10
|
+
# end
|
11
|
+
#
|
12
|
+
# @example As both Series are constructed under the same global string cache, they can be concatenated.
|
13
|
+
# Polars.concat([s1, s2])
|
14
|
+
# # =>
|
15
|
+
# # shape: (6,)
|
16
|
+
# # Series: 'color' [cat]
|
17
|
+
# # [
|
18
|
+
# # "red"
|
19
|
+
# # "green"
|
20
|
+
# # "red"
|
21
|
+
# # "blue"
|
22
|
+
# # "red"
|
23
|
+
# # "green"
|
24
|
+
# # ]
|
3
25
|
class StringCache
|
4
26
|
def initialize(&block)
|
5
27
|
RbStringCacheHolder.hold(&block)
|
6
28
|
end
|
7
29
|
end
|
8
30
|
|
31
|
+
def self.string_cache(...)
|
32
|
+
StringCache.new(...)
|
33
|
+
end
|
34
|
+
|
9
35
|
module Functions
|
10
36
|
# Enable the global string cache.
|
11
37
|
#
|
data/lib/polars/string_expr.rb
CHANGED
@@ -658,7 +658,18 @@ module Polars
|
|
658
658
|
# An optional single character that should be trimmed
|
659
659
|
#
|
660
660
|
# @return [Series]
|
661
|
-
|
661
|
+
#
|
662
|
+
# @example
|
663
|
+
# s = Polars::Series.new([" hello ", "\tworld"])
|
664
|
+
# s.str.strip_chars
|
665
|
+
# # =>
|
666
|
+
# # shape: (2,)
|
667
|
+
# # Series: '' [str]
|
668
|
+
# # [
|
669
|
+
# # "hello"
|
670
|
+
# # "world"
|
671
|
+
# # ]
|
672
|
+
def strip_chars(matches = nil)
|
662
673
|
super
|
663
674
|
end
|
664
675
|
|
@@ -668,9 +679,21 @@ module Polars
|
|
668
679
|
# An optional single character that should be trimmed
|
669
680
|
#
|
670
681
|
# @return [Series]
|
671
|
-
|
682
|
+
#
|
683
|
+
# @example
|
684
|
+
# s = Polars::Series.new([" hello ", "\tworld"])
|
685
|
+
# s.str.strip_chars_start
|
686
|
+
# # =>
|
687
|
+
# # shape: (2,)
|
688
|
+
# # Series: '' [str]
|
689
|
+
# # [
|
690
|
+
# # "hello "
|
691
|
+
# # "world"
|
692
|
+
# # ]
|
693
|
+
def strip_chars_start(matches = nil)
|
672
694
|
super
|
673
695
|
end
|
696
|
+
alias_method :lstrip, :strip_chars_start
|
674
697
|
|
675
698
|
# Remove trailing whitespace.
|
676
699
|
#
|
@@ -678,9 +701,21 @@ module Polars
|
|
678
701
|
# An optional single character that should be trimmed
|
679
702
|
#
|
680
703
|
# @return [Series]
|
681
|
-
|
704
|
+
#
|
705
|
+
# @example
|
706
|
+
# s = Polars::Series.new([" hello ", "world\t"])
|
707
|
+
# s.str.strip_chars_end
|
708
|
+
# # =>
|
709
|
+
# # shape: (2,)
|
710
|
+
# # Series: '' [str]
|
711
|
+
# # [
|
712
|
+
# # " hello"
|
713
|
+
# # "world"
|
714
|
+
# # ]
|
715
|
+
def strip_chars_end(matches = nil)
|
682
716
|
super
|
683
717
|
end
|
718
|
+
alias_method :rstrip, :strip_chars_end
|
684
719
|
|
685
720
|
# Fills the string with zeroes.
|
686
721
|
#
|
@@ -695,6 +730,19 @@ module Polars
|
|
695
730
|
# Fill the value up to this length.
|
696
731
|
#
|
697
732
|
# @return [Series]
|
733
|
+
#
|
734
|
+
# @example
|
735
|
+
# s = Polars::Series.new([-1, 123, 999999, nil])
|
736
|
+
# s.cast(Polars::String).str.zfill(4)
|
737
|
+
# # =>
|
738
|
+
# # shape: (4,)
|
739
|
+
# # Series: '' [str]
|
740
|
+
# # [
|
741
|
+
# # "-001"
|
742
|
+
# # "0123"
|
743
|
+
# # "999999"
|
744
|
+
# # null
|
745
|
+
# # ]
|
698
746
|
def zfill(length)
|
699
747
|
super
|
700
748
|
end
|
@@ -758,6 +806,17 @@ module Polars
|
|
758
806
|
# Modify the strings to their lowercase equivalent.
|
759
807
|
#
|
760
808
|
# @return [Series]
|
809
|
+
#
|
810
|
+
# @example
|
811
|
+
# s = Polars::Series.new("foo", ["CAT", "DOG"])
|
812
|
+
# s.str.to_lowercase
|
813
|
+
# # =>
|
814
|
+
# # shape: (2,)
|
815
|
+
# # Series: 'foo' [str]
|
816
|
+
# # [
|
817
|
+
# # "cat"
|
818
|
+
# # "dog"
|
819
|
+
# # ]
|
761
820
|
def to_lowercase
|
762
821
|
super
|
763
822
|
end
|
@@ -765,6 +824,17 @@ module Polars
|
|
765
824
|
# Modify the strings to their uppercase equivalent.
|
766
825
|
#
|
767
826
|
# @return [Series]
|
827
|
+
#
|
828
|
+
# @example
|
829
|
+
# s = Polars::Series.new("foo", ["cat", "dog"])
|
830
|
+
# s.str.to_uppercase
|
831
|
+
# # =>
|
832
|
+
# # shape: (2,)
|
833
|
+
# # Series: 'foo' [str]
|
834
|
+
# # [
|
835
|
+
# # "CAT"
|
836
|
+
# # "DOG"
|
837
|
+
# # ]
|
768
838
|
def to_uppercase
|
769
839
|
super
|
770
840
|
end
|
@@ -23,16 +23,14 @@ module Polars
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
# Convert this Struct Series to a DataFrame.
|
27
|
-
#
|
28
|
-
# @return [DataFrame]
|
29
|
-
def to_frame
|
30
|
-
Utils.wrap_df(_s.struct_to_frame)
|
31
|
-
end
|
32
|
-
|
33
26
|
# Get the names of the fields.
|
34
27
|
#
|
35
28
|
# @return [Array]
|
29
|
+
#
|
30
|
+
# @example
|
31
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
32
|
+
# s.struct.fields
|
33
|
+
# # => ["a", "b"]
|
36
34
|
def fields
|
37
35
|
if _s.nil?
|
38
36
|
[]
|
@@ -47,6 +45,17 @@ module Polars
|
|
47
45
|
# Name of the field
|
48
46
|
#
|
49
47
|
# @return [Series]
|
48
|
+
#
|
49
|
+
# @example
|
50
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
51
|
+
# s.struct.field("a")
|
52
|
+
# # =>
|
53
|
+
# # shape: (2,)
|
54
|
+
# # Series: 'a' [i64]
|
55
|
+
# # [
|
56
|
+
# # 1
|
57
|
+
# # 3
|
58
|
+
# # ]
|
50
59
|
def field(name)
|
51
60
|
super
|
52
61
|
end
|
@@ -57,6 +66,16 @@ module Polars
|
|
57
66
|
# New names in the order of the struct's fields
|
58
67
|
#
|
59
68
|
# @return [Series]
|
69
|
+
#
|
70
|
+
# @example
|
71
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
72
|
+
# s.struct.fields
|
73
|
+
# # => ["a", "b"]
|
74
|
+
#
|
75
|
+
# @example
|
76
|
+
# s = s.struct.rename_fields(["c", "d"])
|
77
|
+
# s.struct.fields
|
78
|
+
# # => ["c", "d"]
|
60
79
|
def rename_fields(names)
|
61
80
|
super
|
62
81
|
end
|
@@ -64,6 +83,11 @@ module Polars
|
|
64
83
|
# Get the struct definition as a name/dtype schema dict.
|
65
84
|
#
|
66
85
|
# @return [Object]
|
86
|
+
#
|
87
|
+
# @example
|
88
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
89
|
+
# s.struct.schema
|
90
|
+
# # => {"a"=>Polars::Int64, "b"=>Polars::Int64}
|
67
91
|
def schema
|
68
92
|
if _s.nil?
|
69
93
|
{}
|
data/lib/polars/utils/various.rb
CHANGED
@@ -12,6 +12,10 @@ module Polars
|
|
12
12
|
val.all? { |x| x.is_a?(eltype) }
|
13
13
|
end
|
14
14
|
|
15
|
+
def self.is_path_or_str_sequence(val)
|
16
|
+
val.is_a?(::Array) && val.all? { |x| pathlike?(x) }
|
17
|
+
end
|
18
|
+
|
15
19
|
def self.is_bool_sequence(val)
|
16
20
|
val.is_a?(::Array) && val.all? { |x| x == true || x == false }
|
17
21
|
end
|
@@ -42,7 +46,7 @@ module Polars
|
|
42
46
|
end
|
43
47
|
|
44
48
|
def self.normalize_filepath(path, check_not_directory: true)
|
45
|
-
path = File.expand_path(path)
|
49
|
+
path = File.expand_path(path) if !path.is_a?(::String) || path.start_with?("~")
|
46
50
|
if check_not_directory && File.exist?(path) && Dir.exist?(path)
|
47
51
|
raise ArgumentError, "Expected a file path; #{path} is a directory"
|
48
52
|
end
|
data/lib/polars/utils.rb
CHANGED
@@ -3,12 +3,18 @@ module Polars
|
|
3
3
|
module Utils
|
4
4
|
DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
def self.is_polars_dtype(dtype, include_unknown: false)
|
7
|
+
is_dtype = dtype.is_a?(Symbol) || dtype.is_a?(::String) || dtype.is_a?(DataType) || (dtype.is_a?(Class) && dtype < DataType)
|
8
|
+
|
9
|
+
if !include_unknown
|
10
|
+
is_dtype && dtype != Unknown
|
11
|
+
else
|
12
|
+
is_dtype
|
10
13
|
end
|
11
|
-
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.is_column(obj)
|
17
|
+
obj.is_a?(Expr) && obj.meta.is_column
|
12
18
|
end
|
13
19
|
|
14
20
|
def self.map_rb_type_to_dtype(ruby_dtype)
|
@@ -100,6 +106,27 @@ module Polars
|
|
100
106
|
end
|
101
107
|
end
|
102
108
|
|
109
|
+
def self.is_selector(obj)
|
110
|
+
obj.is_a?(Selectors::SelectorProxy)
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.expand_selector(target, selector, strict: true)
|
114
|
+
if target.is_a?(Hash)
|
115
|
+
target = DataFrame.new(schema: target)
|
116
|
+
end
|
117
|
+
|
118
|
+
if !is_selector(selector) && !is_polars_dtype(selector)
|
119
|
+
msg = "expected a selector; found #{selector.inspect} instead."
|
120
|
+
raise TypeError, msg
|
121
|
+
end
|
122
|
+
|
123
|
+
if is_selector(selector)
|
124
|
+
target.select(selector).columns
|
125
|
+
else
|
126
|
+
target.select(Polars.col(selector)).columns
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
103
130
|
def self._expand_selectors(frame, *items)
|
104
131
|
items_iter = _parse_inputs_as_iterable(items)
|
105
132
|
|
@@ -115,16 +142,24 @@ module Polars
|
|
115
142
|
expanded
|
116
143
|
end
|
117
144
|
|
118
|
-
# TODO
|
119
|
-
def self.is_selector(obj)
|
120
|
-
false
|
121
|
-
end
|
122
|
-
|
123
145
|
def self.parse_interval_argument(interval)
|
124
146
|
if interval.include?(" ")
|
125
147
|
interval = interval.gsub(" ", "")
|
126
148
|
end
|
127
149
|
interval.downcase
|
128
150
|
end
|
151
|
+
|
152
|
+
def self.parse_into_dtype(input)
|
153
|
+
if is_polars_dtype(input)
|
154
|
+
input
|
155
|
+
else
|
156
|
+
raise Todo
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def self.re_escape(s)
|
161
|
+
# escapes _only_ those metachars with meaning to the rust regex crate
|
162
|
+
Plr.re_escape(s)
|
163
|
+
end
|
129
164
|
end
|
130
165
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -24,12 +24,12 @@ require_relative "polars/convert"
|
|
24
24
|
require_relative "polars/plot"
|
25
25
|
require_relative "polars/data_frame"
|
26
26
|
require_relative "polars/data_types"
|
27
|
+
require_relative "polars/data_type_group"
|
27
28
|
require_relative "polars/date_time_expr"
|
28
29
|
require_relative "polars/date_time_name_space"
|
29
30
|
require_relative "polars/dynamic_group_by"
|
30
31
|
require_relative "polars/exceptions"
|
31
32
|
require_relative "polars/expr"
|
32
|
-
require_relative "polars/functions"
|
33
33
|
require_relative "polars/functions/as_datatype"
|
34
34
|
require_relative "polars/functions/col"
|
35
35
|
require_relative "polars/functions/eager"
|
@@ -60,6 +60,7 @@ require_relative "polars/list_name_space"
|
|
60
60
|
require_relative "polars/meta_expr"
|
61
61
|
require_relative "polars/name_expr"
|
62
62
|
require_relative "polars/rolling_group_by"
|
63
|
+
require_relative "polars/selectors"
|
63
64
|
require_relative "polars/series"
|
64
65
|
require_relative "polars/slice"
|
65
66
|
require_relative "polars/sql_context"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.0
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -53,6 +53,7 @@ files:
|
|
53
53
|
- lib/polars/config.rb
|
54
54
|
- lib/polars/convert.rb
|
55
55
|
- lib/polars/data_frame.rb
|
56
|
+
- lib/polars/data_type_group.rb
|
56
57
|
- lib/polars/data_types.rb
|
57
58
|
- lib/polars/date_time_expr.rb
|
58
59
|
- lib/polars/date_time_name_space.rb
|
@@ -60,7 +61,6 @@ files:
|
|
60
61
|
- lib/polars/exceptions.rb
|
61
62
|
- lib/polars/expr.rb
|
62
63
|
- lib/polars/expr_dispatch.rb
|
63
|
-
- lib/polars/functions.rb
|
64
64
|
- lib/polars/functions/aggregation/horizontal.rb
|
65
65
|
- lib/polars/functions/aggregation/vertical.rb
|
66
66
|
- lib/polars/functions/as_datatype.rb
|
@@ -92,6 +92,7 @@ files:
|
|
92
92
|
- lib/polars/name_expr.rb
|
93
93
|
- lib/polars/plot.rb
|
94
94
|
- lib/polars/rolling_group_by.rb
|
95
|
+
- lib/polars/selectors.rb
|
95
96
|
- lib/polars/series.rb
|
96
97
|
- lib/polars/slice.rb
|
97
98
|
- lib/polars/sql_context.rb
|
data/lib/polars/functions.rb
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
module Polars
|
2
|
-
module Functions
|
3
|
-
# Convert categorical variables into dummy/indicator variables.
|
4
|
-
#
|
5
|
-
# @param df [DataFrame]
|
6
|
-
# DataFrame to convert.
|
7
|
-
# @param columns [Array, nil]
|
8
|
-
# A subset of columns to convert to dummy variables. `nil` means
|
9
|
-
# "all columns".
|
10
|
-
#
|
11
|
-
# @return [DataFrame]
|
12
|
-
def get_dummies(df, columns: nil)
|
13
|
-
df.to_dummies(columns: columns)
|
14
|
-
end
|
15
|
-
|
16
|
-
# Aggregate to list.
|
17
|
-
#
|
18
|
-
# @return [Expr]
|
19
|
-
def to_list(name)
|
20
|
-
col(name).list
|
21
|
-
end
|
22
|
-
|
23
|
-
# Compute the spearman rank correlation between two columns.
|
24
|
-
#
|
25
|
-
# Missing data will be excluded from the computation.
|
26
|
-
#
|
27
|
-
# @param a [Object]
|
28
|
-
# Column name or Expression.
|
29
|
-
# @param b [Object]
|
30
|
-
# Column name or Expression.
|
31
|
-
# @param ddof [Integer]
|
32
|
-
# Delta degrees of freedom
|
33
|
-
# @param propagate_nans [Boolean]
|
34
|
-
# If `True` any `NaN` encountered will lead to `NaN` in the output.
|
35
|
-
# Defaults to `False` where `NaN` are regarded as larger than any finite number
|
36
|
-
# and thus lead to the highest rank.
|
37
|
-
#
|
38
|
-
# @return [Expr]
|
39
|
-
def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
|
40
|
-
corr(a, b, method: "spearman", ddof: ddof, propagate_nans: propagate_nans)
|
41
|
-
end
|
42
|
-
|
43
|
-
# Compute the pearson's correlation between two columns.
|
44
|
-
#
|
45
|
-
# @param a [Object]
|
46
|
-
# Column name or Expression.
|
47
|
-
# @param b [Object]
|
48
|
-
# Column name or Expression.
|
49
|
-
# @param ddof [Integer]
|
50
|
-
# Delta degrees of freedom
|
51
|
-
#
|
52
|
-
# @return [Expr]
|
53
|
-
def pearson_corr(a, b, ddof: 1)
|
54
|
-
corr(a, b, method: "pearson", ddof: ddof)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|