polars-df 0.21.0 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion/mod.rs +92 -4
- data/ext/polars/src/exceptions.rs +1 -0
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +24 -1
- data/ext/polars/src/expr/datetime.rs +58 -0
- data/ext/polars/src/expr/general.rs +84 -5
- data/ext/polars/src/expr/list.rs +24 -0
- data/ext/polars/src/expr/meta.rs +11 -0
- data/ext/polars/src/expr/mod.rs +1 -0
- data/ext/polars/src/expr/name.rs +8 -0
- data/ext/polars/src/expr/rolling.rs +20 -0
- data/ext/polars/src/expr/string.rs +59 -0
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/functions/io.rs +19 -0
- data/ext/polars/src/functions/lazy.rs +4 -0
- data/ext/polars/src/lazyframe/general.rs +51 -0
- data/ext/polars/src/lib.rs +119 -10
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/series.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +44 -0
- data/ext/polars/src/series/general.rs +64 -4
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +138 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +6 -6
- data/lib/polars/data_frame.rb +684 -19
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +14 -2
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/expr.rb +1213 -180
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +13 -0
- data/lib/polars/io/csv.rb +1 -1
- data/lib/polars/io/json.rb +4 -4
- data/lib/polars/io/ndjson.rb +4 -4
- data/lib/polars/io/parquet.rb +27 -5
- data/lib/polars/lazy_frame.rb +936 -20
- data/lib/polars/list_expr.rb +196 -4
- data/lib/polars/list_name_space.rb +201 -4
- data/lib/polars/meta_expr.rb +64 -0
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/schema.rb +79 -3
- data/lib/polars/selector.rb +72 -0
- data/lib/polars/selectors.rb +3 -3
- data/lib/polars/series.rb +1051 -54
- data/lib/polars/string_expr.rb +411 -6
- data/lib/polars/string_name_space.rb +722 -49
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -0
- metadata +4 -1
data/lib/polars/schema.rb
CHANGED
@@ -1,34 +1,110 @@
|
|
1
1
|
module Polars
|
2
2
|
class Schema
|
3
|
+
# Ordered mapping of column names to their data type.
|
4
|
+
#
|
5
|
+
# @param schema [Object]
|
6
|
+
# The schema definition given by column names and their associated
|
7
|
+
# Polars data type. Accepts a mapping or an enumerable of arrays.
|
3
8
|
def initialize(schema = nil, check_dtypes: true)
|
4
|
-
|
5
|
-
@schema =
|
9
|
+
input = schema || {}
|
10
|
+
@schema = {}
|
11
|
+
input.each do |name, tp|
|
12
|
+
if !check_dtypes
|
13
|
+
@schema[name] = tp
|
14
|
+
elsif Utils.is_polars_dtype(tp)
|
15
|
+
@schema[name] = _check_dtype(tp)
|
16
|
+
else
|
17
|
+
self[name] = tp
|
18
|
+
end
|
19
|
+
end
|
6
20
|
end
|
7
21
|
|
22
|
+
# Returns the data type of the column.
|
23
|
+
#
|
24
|
+
# @return [Object]
|
8
25
|
def [](key)
|
9
26
|
@schema[key]
|
10
27
|
end
|
11
28
|
|
29
|
+
# Sets the data type of the column.
|
30
|
+
#
|
31
|
+
# @return [Object]
|
12
32
|
def []=(name, dtype)
|
13
|
-
# TODO check dtype
|
33
|
+
# TODO check dtype
|
14
34
|
@schema[name] = dtype
|
15
35
|
end
|
16
36
|
|
37
|
+
# Get the column names of the schema.
|
38
|
+
#
|
39
|
+
# @return [Array]
|
40
|
+
#
|
41
|
+
# @example
|
42
|
+
# s = Polars::Schema.new({"x" => Polars::Float64.new, "y" => Polars::Datetime.new(time_zone: "UTC")})
|
43
|
+
# s.names
|
44
|
+
# # => ["x", "y"]
|
17
45
|
def names
|
18
46
|
@schema.keys
|
19
47
|
end
|
20
48
|
|
49
|
+
# Get the data types of the schema.
|
50
|
+
#
|
51
|
+
# @return [Array]
|
52
|
+
#
|
53
|
+
# @example
|
54
|
+
# s = Polars::Schema.new({"x" => Polars::UInt8.new, "y" => Polars::List.new(Polars::UInt8)})
|
55
|
+
# s.dtypes
|
56
|
+
# # => [Polars::UInt8, Polars::List(Polars::UInt8)]
|
21
57
|
def dtypes
|
22
58
|
@schema.values
|
23
59
|
end
|
24
60
|
|
61
|
+
# Get the number of schema entries.
|
62
|
+
#
|
63
|
+
# @return [Integer]
|
64
|
+
#
|
65
|
+
# @example
|
66
|
+
# s = Polars::Schema.new({"x" => Polars::Int32.new, "y" => Polars::List.new(Polars::String)})
|
67
|
+
# s.length
|
68
|
+
# # => 2
|
25
69
|
def length
|
26
70
|
@schema.length
|
27
71
|
end
|
28
72
|
|
73
|
+
# Returns a string representing the Schema.
|
74
|
+
#
|
75
|
+
# @return [String]
|
29
76
|
def to_s
|
30
77
|
"#{self.class.name}(#{@schema})"
|
31
78
|
end
|
32
79
|
alias_method :inspect, :to_s
|
80
|
+
|
81
|
+
# @private
|
82
|
+
def include?(name)
|
83
|
+
@schema.include?(name)
|
84
|
+
end
|
85
|
+
|
86
|
+
# @private
|
87
|
+
def to_h
|
88
|
+
@schema.to_h
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def _check_dtype(tp)
|
94
|
+
if !tp.is_a?(DataType)
|
95
|
+
# note: if nested/decimal, or has signature params, this implies required args
|
96
|
+
if tp.nested? || tp.decimal? || _required_init_args(tp)
|
97
|
+
msg = "dtypes must be fully-specified, got: #{tp.inspect}"
|
98
|
+
raise TypeError, msg
|
99
|
+
end
|
100
|
+
tp = tp.new
|
101
|
+
end
|
102
|
+
tp
|
103
|
+
end
|
104
|
+
|
105
|
+
def _required_init_args(tp)
|
106
|
+
arity = tp.method(:new).arity
|
107
|
+
arity > 0 || arity < -1
|
108
|
+
end
|
33
109
|
end
|
34
110
|
end
|
data/lib/polars/selector.rb
CHANGED
@@ -12,6 +12,9 @@ module Polars
|
|
12
12
|
slf
|
13
13
|
end
|
14
14
|
|
15
|
+
# Returns a string representing the Selector.
|
16
|
+
#
|
17
|
+
# @return [String]
|
15
18
|
def inspect
|
16
19
|
Expr._from_rbexpr(_rbexpr).to_s
|
17
20
|
end
|
@@ -50,10 +53,16 @@ module Polars
|
|
50
53
|
_from_rbselector(RbSelector.by_name(names, strict))
|
51
54
|
end
|
52
55
|
|
56
|
+
# Invert the selector.
|
57
|
+
#
|
58
|
+
# @return [Selector]
|
53
59
|
def ~
|
54
60
|
Selectors.all - self
|
55
61
|
end
|
56
62
|
|
63
|
+
# AND.
|
64
|
+
#
|
65
|
+
# @return [Selector]
|
57
66
|
def &(other)
|
58
67
|
if Utils.is_column(other)
|
59
68
|
colname = other.meta.output_name
|
@@ -68,6 +77,9 @@ module Polars
|
|
68
77
|
end
|
69
78
|
end
|
70
79
|
|
80
|
+
# OR.
|
81
|
+
#
|
82
|
+
# @return [Selector]
|
71
83
|
def |(other)
|
72
84
|
if Utils.is_column(other)
|
73
85
|
other = by_name(other.meta.output_name)
|
@@ -81,6 +93,9 @@ module Polars
|
|
81
93
|
end
|
82
94
|
end
|
83
95
|
|
96
|
+
# Difference.
|
97
|
+
#
|
98
|
+
# @return [Selector]
|
84
99
|
def -(other)
|
85
100
|
if Utils.is_selector(other)
|
86
101
|
Selector._from_rbselector(
|
@@ -91,6 +106,9 @@ module Polars
|
|
91
106
|
end
|
92
107
|
end
|
93
108
|
|
109
|
+
# XOR.
|
110
|
+
#
|
111
|
+
# @return [Selector]
|
94
112
|
def ^(other)
|
95
113
|
if Utils.is_column(other)
|
96
114
|
other = by_name(other.meta.output_name)
|
@@ -104,6 +122,19 @@ module Polars
|
|
104
122
|
end
|
105
123
|
end
|
106
124
|
|
125
|
+
# Exclude columns from a multi-column expression.
|
126
|
+
#
|
127
|
+
# Only works after a wildcard or regex column selection, and you cannot provide
|
128
|
+
# both string column names *and* dtypes (you may prefer to use selectors instead).
|
129
|
+
#
|
130
|
+
# @return [Selector]
|
131
|
+
#
|
132
|
+
# @param columns [Object]
|
133
|
+
# The name or datatype of the column(s) to exclude. Accepts regular expression
|
134
|
+
# input. Regular expressions should start with `^` and end with `$`.
|
135
|
+
# @param more_columns [Array]
|
136
|
+
# Additional names or datatypes of columns to exclude, specified as positional
|
137
|
+
# arguments.
|
107
138
|
def exclude(columns, *more_columns)
|
108
139
|
exclude_cols = []
|
109
140
|
exclude_dtypes = []
|
@@ -131,6 +162,47 @@ module Polars
|
|
131
162
|
end
|
132
163
|
end
|
133
164
|
|
165
|
+
# Materialize the `selector` as a normal expression.
|
166
|
+
#
|
167
|
+
# This ensures that the operators `|`, `&`, `~` and `-`
|
168
|
+
# are applied on the data and not on the selector sets.
|
169
|
+
#
|
170
|
+
# @return [Expr]
|
171
|
+
#
|
172
|
+
# @example Inverting the boolean selector will choose the non-boolean columns:
|
173
|
+
# df = Polars::DataFrame.new(
|
174
|
+
# {
|
175
|
+
# "colx" => ["aa", "bb", "cc"],
|
176
|
+
# "coly" => [true, false, true],
|
177
|
+
# "colz" => [1, 2, 3]
|
178
|
+
# }
|
179
|
+
# )
|
180
|
+
# df.select(~Polars.cs.boolean)
|
181
|
+
# # =>
|
182
|
+
# # shape: (3, 2)
|
183
|
+
# # ┌──────┬──────┐
|
184
|
+
# # │ colx ┆ colz │
|
185
|
+
# # │ --- ┆ --- │
|
186
|
+
# # │ str ┆ i64 │
|
187
|
+
# # ╞══════╪══════╡
|
188
|
+
# # │ aa ┆ 1 │
|
189
|
+
# # │ bb ┆ 2 │
|
190
|
+
# # │ cc ┆ 3 │
|
191
|
+
# # └──────┴──────┘
|
192
|
+
#
|
193
|
+
# @example To invert the *values* in the selected boolean columns, we need to materialize the selector as a standard expression instead:
|
194
|
+
# df.select(~Polars.cs.boolean.as_expr)
|
195
|
+
# # =>
|
196
|
+
# # shape: (3, 1)
|
197
|
+
# # ┌───────┐
|
198
|
+
# # │ coly │
|
199
|
+
# # │ --- │
|
200
|
+
# # │ bool │
|
201
|
+
# # ╞═══════╡
|
202
|
+
# # │ false │
|
203
|
+
# # │ true │
|
204
|
+
# # │ false │
|
205
|
+
# # └───────┘
|
134
206
|
def as_expr
|
135
207
|
Expr._from_rbexpr(_rbexpr)
|
136
208
|
end
|
data/lib/polars/selectors.rb
CHANGED
@@ -287,7 +287,7 @@ module Polars
|
|
287
287
|
# # │ b"hello" ┆ world ┆ b"!" ┆ :) │
|
288
288
|
# # └──────────┴───────┴────────┴─────┘
|
289
289
|
#
|
290
|
-
# @example Select binary columns and export as a
|
290
|
+
# @example Select binary columns and export as a hash:
|
291
291
|
# df.select(Polars.cs.binary).to_h(as_series: false)
|
292
292
|
# # => {"a"=>["hello"], "c"=>["!"]}
|
293
293
|
#
|
@@ -628,7 +628,7 @@ module Polars
|
|
628
628
|
# # └──────┘
|
629
629
|
#
|
630
630
|
# @example Select all columns *except* for those that are enum:
|
631
|
-
# df.select(~Polars.cs.enum
|
631
|
+
# df.select(~Polars.cs.enum)
|
632
632
|
# # =>
|
633
633
|
# # shape: (2, 2)
|
634
634
|
# # ┌─────┬─────┐
|
@@ -928,7 +928,7 @@ module Polars
|
|
928
928
|
# # │ 456 ┆ 5.5 │
|
929
929
|
# # └─────┴─────┘
|
930
930
|
def self.categorical
|
931
|
-
Selector._from_rbselector(RbSelector.categorical
|
931
|
+
Selector._from_rbselector(RbSelector.categorical)
|
932
932
|
end
|
933
933
|
|
934
934
|
# Select columns whose names contain the given literal substring(s).
|