polars-df 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +55 -48
- data/Cargo.toml +3 -0
- data/README.md +12 -0
- data/ext/polars/Cargo.toml +22 -11
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/catalog/unity.rs +96 -94
- data/ext/polars/src/conversion/any_value.rs +26 -30
- data/ext/polars/src/conversion/chunked_array.rs +32 -28
- data/ext/polars/src/conversion/datetime.rs +11 -0
- data/ext/polars/src/conversion/mod.rs +307 -34
- data/ext/polars/src/dataframe/construction.rs +4 -3
- data/ext/polars/src/dataframe/export.rs +17 -15
- data/ext/polars/src/dataframe/general.rs +15 -12
- data/ext/polars/src/dataframe/io.rs +1 -2
- data/ext/polars/src/dataframe/mod.rs +25 -1
- data/ext/polars/src/dataframe/serde.rs +23 -8
- data/ext/polars/src/exceptions.rs +8 -4
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +24 -1
- data/ext/polars/src/expr/datetime.rs +58 -14
- data/ext/polars/src/expr/general.rs +87 -15
- data/ext/polars/src/expr/list.rs +32 -24
- data/ext/polars/src/expr/meta.rs +15 -6
- data/ext/polars/src/expr/mod.rs +3 -0
- data/ext/polars/src/expr/name.rs +19 -14
- data/ext/polars/src/expr/rolling.rs +20 -0
- data/ext/polars/src/expr/serde.rs +28 -0
- data/ext/polars/src/expr/string.rs +64 -10
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/file.rs +15 -9
- data/ext/polars/src/functions/business.rs +0 -1
- data/ext/polars/src/functions/io.rs +25 -3
- data/ext/polars/src/functions/lazy.rs +11 -6
- data/ext/polars/src/functions/meta.rs +3 -3
- data/ext/polars/src/functions/string_cache.rs +3 -3
- data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
- data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
- data/ext/polars/src/io/mod.rs +6 -0
- data/ext/polars/src/lazyframe/general.rs +59 -9
- data/ext/polars/src/lazyframe/mod.rs +16 -1
- data/ext/polars/src/lazyframe/optflags.rs +58 -0
- data/ext/polars/src/lazyframe/serde.rs +27 -3
- data/ext/polars/src/lib.rs +261 -19
- data/ext/polars/src/map/dataframe.rs +20 -17
- data/ext/polars/src/map/lazy.rs +6 -5
- data/ext/polars/src/map/series.rs +8 -7
- data/ext/polars/src/on_startup.rs +12 -5
- data/ext/polars/src/rb_modules.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +85 -28
- data/ext/polars/src/series/construction.rs +1 -0
- data/ext/polars/src/series/export.rs +37 -33
- data/ext/polars/src/series/general.rs +120 -21
- data/ext/polars/src/series/mod.rs +29 -4
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +138 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +6 -6
- data/lib/polars/data_frame.rb +794 -27
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +26 -5
- data/lib/polars/date_time_expr.rb +252 -1
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/expr.rb +1248 -206
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +14 -1
- data/lib/polars/io/csv.rb +1 -1
- data/lib/polars/io/iceberg.rb +27 -0
- data/lib/polars/io/json.rb +4 -4
- data/lib/polars/io/ndjson.rb +4 -4
- data/lib/polars/io/parquet.rb +32 -7
- data/lib/polars/io/scan_options.rb +4 -1
- data/lib/polars/lazy_frame.rb +1028 -28
- data/lib/polars/list_expr.rb +217 -17
- data/lib/polars/list_name_space.rb +231 -22
- data/lib/polars/meta_expr.rb +89 -0
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +20 -1
- data/lib/polars/schema.rb +79 -3
- data/lib/polars/selector.rb +72 -0
- data/lib/polars/selectors.rb +3 -3
- data/lib/polars/series.rb +1053 -54
- data/lib/polars/string_expr.rb +436 -32
- data/lib/polars/string_name_space.rb +736 -50
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +22 -1
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +11 -1
data/lib/polars/struct_expr.rb
CHANGED
@@ -57,6 +57,36 @@ module Polars
|
|
57
57
|
Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
|
58
58
|
end
|
59
59
|
|
60
|
+
# Expand the struct into its individual fields.
|
61
|
+
#
|
62
|
+
# Alias for `Expr.struct.field("*")`.
|
63
|
+
#
|
64
|
+
# @return [Expr]
|
65
|
+
#
|
66
|
+
# @example
|
67
|
+
# df = Polars::DataFrame.new(
|
68
|
+
# {
|
69
|
+
# "aaa" => [1, 2],
|
70
|
+
# "bbb" => ["ab", "cd"],
|
71
|
+
# "ccc" => [true, nil],
|
72
|
+
# "ddd" => [[1, 2], [3]]
|
73
|
+
# }
|
74
|
+
# ).select(Polars.struct("aaa", "bbb", "ccc", "ddd").alias("struct_col"))
|
75
|
+
# df.select(Polars.col("struct_col").struct.unnest)
|
76
|
+
# # =>
|
77
|
+
# # shape: (2, 4)
|
78
|
+
# # ┌─────┬─────┬──────┬───────────┐
|
79
|
+
# # │ aaa ┆ bbb ┆ ccc ┆ ddd │
|
80
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
81
|
+
# # │ i64 ┆ str ┆ bool ┆ list[i64] │
|
82
|
+
# # ╞═════╪═════╪══════╪═══════════╡
|
83
|
+
# # │ 1 ┆ ab ┆ true ┆ [1, 2] │
|
84
|
+
# # │ 2 ┆ cd ┆ null ┆ [3] │
|
85
|
+
# # └─────┴─────┴──────┴───────────┘
|
86
|
+
def unnest
|
87
|
+
field("*")
|
88
|
+
end
|
89
|
+
|
60
90
|
# Rename the fields of the struct.
|
61
91
|
#
|
62
92
|
# @param names [Array]
|
@@ -94,5 +124,78 @@ module Polars
|
|
94
124
|
def rename_fields(names)
|
95
125
|
Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
|
96
126
|
end
|
127
|
+
|
128
|
+
# Convert this struct to a string column with json values.
|
129
|
+
#
|
130
|
+
# @return [Expr]
|
131
|
+
#
|
132
|
+
# @example
|
133
|
+
# Polars::DataFrame.new(
|
134
|
+
# {"a" => [{"a" => [1, 2], "b" => [45]}, {"a" => [9, 1, 3], "b" => nil}]}
|
135
|
+
# ).with_columns(Polars.col("a").struct.json_encode.alias("encoded"))
|
136
|
+
# # =>
|
137
|
+
# # shape: (2, 2)
|
138
|
+
# # ┌──────────────────┬────────────────────────┐
|
139
|
+
# # │ a ┆ encoded │
|
140
|
+
# # │ --- ┆ --- │
|
141
|
+
# # │ struct[2] ┆ str │
|
142
|
+
# # ╞══════════════════╪════════════════════════╡
|
143
|
+
# # │ {[1, 2],[45]} ┆ {"a":[1,2],"b":[45]} │
|
144
|
+
# # │ {[9, 1, 3],null} ┆ {"a":[9,1,3],"b":null} │
|
145
|
+
# # └──────────────────┴────────────────────────┘
|
146
|
+
def json_encode
|
147
|
+
Utils.wrap_expr(_rbexpr.struct_json_encode)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Add or overwrite fields of this struct.
|
151
|
+
#
|
152
|
+
# This is similar to `with_columns` on `DataFrame`.
|
153
|
+
#
|
154
|
+
# @param exprs [Array]
|
155
|
+
# Field(s) to add, specified as positional arguments.
|
156
|
+
# Accepts expression input. Strings are parsed as column names, other
|
157
|
+
# non-expression inputs are parsed as literals.
|
158
|
+
# @param named_exprs [Hash]
|
159
|
+
# Additional fields to add, specified as keyword arguments.
|
160
|
+
# The columns will be renamed to the keyword used.
|
161
|
+
#
|
162
|
+
# @return [Expr]
|
163
|
+
#
|
164
|
+
# @example
|
165
|
+
# df = Polars::DataFrame.new(
|
166
|
+
# {
|
167
|
+
# "coords" => [{"x" => 1, "y" => 4}, {"x" => 4, "y" => 9}, {"x" => 9, "y" => 16}],
|
168
|
+
# "multiply" => [10, 2, 3]
|
169
|
+
# }
|
170
|
+
# )
|
171
|
+
# df.with_columns(
|
172
|
+
# Polars.col("coords").struct.with_fields(
|
173
|
+
# Polars.field("x").sqrt,
|
174
|
+
# y_mul: Polars.field("y") * Polars.col("multiply")
|
175
|
+
# )
|
176
|
+
# )
|
177
|
+
# # =>
|
178
|
+
# # shape: (3, 2)
|
179
|
+
# # ┌─────────────┬──────────┐
|
180
|
+
# # │ coords ┆ multiply │
|
181
|
+
# # │ --- ┆ --- │
|
182
|
+
# # │ struct[3] ┆ i64 │
|
183
|
+
# # ╞═════════════╪══════════╡
|
184
|
+
# # │ {1.0,4,40} ┆ 10 │
|
185
|
+
# # │ {2.0,9,18} ┆ 2 │
|
186
|
+
# # │ {3.0,16,48} ┆ 3 │
|
187
|
+
# # └─────────────┴──────────┘
|
188
|
+
def with_fields(
|
189
|
+
*exprs,
|
190
|
+
**named_exprs
|
191
|
+
)
|
192
|
+
structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", 0).to_i != 0
|
193
|
+
|
194
|
+
rbexprs = Utils.parse_into_list_of_expressions(
|
195
|
+
*exprs, **named_exprs, __structify: structify
|
196
|
+
)
|
197
|
+
|
198
|
+
Utils.wrap_expr(_rbexpr.struct_with_fields(rbexprs))
|
199
|
+
end
|
97
200
|
end
|
98
201
|
end
|
@@ -80,7 +80,7 @@ module Polars
|
|
80
80
|
super
|
81
81
|
end
|
82
82
|
|
83
|
-
# Get the struct definition as a name/dtype schema
|
83
|
+
# Get the struct definition as a name/dtype schema hash.
|
84
84
|
#
|
85
85
|
# @return [Object]
|
86
86
|
#
|
@@ -116,5 +116,23 @@ module Polars
|
|
116
116
|
def unnest
|
117
117
|
Utils.wrap_df(_s.struct_unnest)
|
118
118
|
end
|
119
|
+
|
120
|
+
# Convert this struct to a string column with json values.
|
121
|
+
#
|
122
|
+
# @return [Series]
|
123
|
+
#
|
124
|
+
# @example
|
125
|
+
# s = Polars::Series.new("a", [{"a" => [1, 2], "b" => [45]}, {"a" => [9, 1, 3], "b" => nil}])
|
126
|
+
# s.struct.json_encode
|
127
|
+
# # =>
|
128
|
+
# # shape: (2,)
|
129
|
+
# # Series: 'a' [str]
|
130
|
+
# # [
|
131
|
+
# # "{"a":[1,2],"b":[45]}"
|
132
|
+
# # "{"a":[9,1,3],"b":null}"
|
133
|
+
# # ]
|
134
|
+
def json_encode
|
135
|
+
super
|
136
|
+
end
|
119
137
|
end
|
120
138
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.serialize_polars_object(serializer, file)
|
4
|
+
serialize_to_bytes = lambda do
|
5
|
+
buf = StringIO.new
|
6
|
+
serializer.(buf)
|
7
|
+
buf.string
|
8
|
+
end
|
9
|
+
|
10
|
+
if file.nil?
|
11
|
+
return serialize_to_bytes.call
|
12
|
+
end
|
13
|
+
|
14
|
+
raise Todo
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/polars/utils/various.rb
CHANGED
@@ -16,7 +16,10 @@ module Polars
|
|
16
16
|
val.is_a?(::Array) && val.all? { |x| pathlike?(x) }
|
17
17
|
end
|
18
18
|
|
19
|
-
def self.is_bool_sequence(val)
|
19
|
+
def self.is_bool_sequence(val, include_series: false)
|
20
|
+
if include_series && val.is_a?(Series)
|
21
|
+
return val.dtype == Boolean
|
22
|
+
end
|
20
23
|
val.is_a?(::Array) && val.all? { |x| x == true || x == false }
|
21
24
|
end
|
22
25
|
|
@@ -24,6 +27,10 @@ module Polars
|
|
24
27
|
val.is_a?(::Array) && _is_iterable_of(val, Integer)
|
25
28
|
end
|
26
29
|
|
30
|
+
def self.is_sequence(val, include_series: false)
|
31
|
+
val.is_a?(::Array) || (include_series && val.is_a?(Series))
|
32
|
+
end
|
33
|
+
|
27
34
|
def self.is_str_sequence(val, allow_str: false)
|
28
35
|
if allow_str == false && val.is_a?(::String)
|
29
36
|
false
|
@@ -68,6 +75,10 @@ module Polars
|
|
68
75
|
end
|
69
76
|
end
|
70
77
|
|
78
|
+
def self._polars_warn(msg)
|
79
|
+
warn msg
|
80
|
+
end
|
81
|
+
|
71
82
|
def self.extend_bool(value, n_match, value_name, match_name)
|
72
83
|
values = bool?(value) ? [value] * n_match : value
|
73
84
|
if n_match != values.length
|
@@ -76,5 +87,15 @@ module Polars
|
|
76
87
|
end
|
77
88
|
values
|
78
89
|
end
|
90
|
+
|
91
|
+
def self.require_same_type(current, other)
|
92
|
+
if !other.is_a?(current.class) && !current.is_a?(other.class)
|
93
|
+
msg = (
|
94
|
+
"expected `other` to be a #{current.inspect}, " +
|
95
|
+
"not #{other.inspect}"
|
96
|
+
)
|
97
|
+
raise TypeError, msg
|
98
|
+
end
|
99
|
+
end
|
79
100
|
end
|
80
101
|
end
|
data/lib/polars/utils.rb
CHANGED
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -29,6 +29,7 @@ require_relative "polars/convert"
|
|
29
29
|
require_relative "polars/plot"
|
30
30
|
require_relative "polars/data_frame"
|
31
31
|
require_relative "polars/data_types"
|
32
|
+
require_relative "polars/data_type_expr"
|
32
33
|
require_relative "polars/data_type_group"
|
33
34
|
require_relative "polars/date_time_expr"
|
34
35
|
require_relative "polars/date_time_name_space"
|
@@ -36,7 +37,9 @@ require_relative "polars/dynamic_group_by"
|
|
36
37
|
require_relative "polars/exceptions"
|
37
38
|
require_relative "polars/expr"
|
38
39
|
require_relative "polars/functions/as_datatype"
|
40
|
+
require_relative "polars/functions/business"
|
39
41
|
require_relative "polars/functions/col"
|
42
|
+
require_relative "polars/functions/datatype"
|
40
43
|
require_relative "polars/functions/eager"
|
41
44
|
require_relative "polars/functions/lazy"
|
42
45
|
require_relative "polars/functions/len"
|
@@ -55,6 +58,7 @@ require_relative "polars/io/avro"
|
|
55
58
|
require_relative "polars/io/csv"
|
56
59
|
require_relative "polars/io/database"
|
57
60
|
require_relative "polars/io/delta"
|
61
|
+
require_relative "polars/io/iceberg"
|
58
62
|
require_relative "polars/io/ipc"
|
59
63
|
require_relative "polars/io/json"
|
60
64
|
require_relative "polars/io/ndjson"
|
@@ -66,6 +70,7 @@ require_relative "polars/list_expr"
|
|
66
70
|
require_relative "polars/list_name_space"
|
67
71
|
require_relative "polars/meta_expr"
|
68
72
|
require_relative "polars/name_expr"
|
73
|
+
require_relative "polars/query_opt_flags"
|
69
74
|
require_relative "polars/rolling_group_by"
|
70
75
|
require_relative "polars/scan_cast_options"
|
71
76
|
require_relative "polars/schema"
|
@@ -84,6 +89,7 @@ require_relative "polars/utils"
|
|
84
89
|
require_relative "polars/utils/constants"
|
85
90
|
require_relative "polars/utils/convert"
|
86
91
|
require_relative "polars/utils/parse"
|
92
|
+
require_relative "polars/utils/serde"
|
87
93
|
require_relative "polars/utils/various"
|
88
94
|
require_relative "polars/utils/wrap"
|
89
95
|
require_relative "polars/version"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.22.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
@@ -58,6 +58,7 @@ files:
|
|
58
58
|
- ext/polars/src/conversion/any_value.rs
|
59
59
|
- ext/polars/src/conversion/categorical.rs
|
60
60
|
- ext/polars/src/conversion/chunked_array.rs
|
61
|
+
- ext/polars/src/conversion/datetime.rs
|
61
62
|
- ext/polars/src/conversion/mod.rs
|
62
63
|
- ext/polars/src/dataframe/construction.rs
|
63
64
|
- ext/polars/src/dataframe/export.rs
|
@@ -69,6 +70,7 @@ files:
|
|
69
70
|
- ext/polars/src/exceptions.rs
|
70
71
|
- ext/polars/src/expr/array.rs
|
71
72
|
- ext/polars/src/expr/binary.rs
|
73
|
+
- ext/polars/src/expr/bitwise.rs
|
72
74
|
- ext/polars/src/expr/categorical.rs
|
73
75
|
- ext/polars/src/expr/datatype.rs
|
74
76
|
- ext/polars/src/expr/datetime.rs
|
@@ -79,6 +81,7 @@ files:
|
|
79
81
|
- ext/polars/src/expr/name.rs
|
80
82
|
- ext/polars/src/expr/rolling.rs
|
81
83
|
- ext/polars/src/expr/selector.rs
|
84
|
+
- ext/polars/src/expr/serde.rs
|
82
85
|
- ext/polars/src/expr/string.rs
|
83
86
|
- ext/polars/src/expr/struct.rs
|
84
87
|
- ext/polars/src/file.rs
|
@@ -104,6 +107,7 @@ files:
|
|
104
107
|
- ext/polars/src/io/mod.rs
|
105
108
|
- ext/polars/src/lazyframe/general.rs
|
106
109
|
- ext/polars/src/lazyframe/mod.rs
|
110
|
+
- ext/polars/src/lazyframe/optflags.rs
|
107
111
|
- ext/polars/src/lazyframe/serde.rs
|
108
112
|
- ext/polars/src/lazyframe/sink.rs
|
109
113
|
- ext/polars/src/lazygroupby.rs
|
@@ -145,6 +149,7 @@ files:
|
|
145
149
|
- lib/polars/config.rb
|
146
150
|
- lib/polars/convert.rb
|
147
151
|
- lib/polars/data_frame.rb
|
152
|
+
- lib/polars/data_type_expr.rb
|
148
153
|
- lib/polars/data_type_group.rb
|
149
154
|
- lib/polars/data_types.rb
|
150
155
|
- lib/polars/date_time_expr.rb
|
@@ -156,7 +161,9 @@ files:
|
|
156
161
|
- lib/polars/functions/aggregation/horizontal.rb
|
157
162
|
- lib/polars/functions/aggregation/vertical.rb
|
158
163
|
- lib/polars/functions/as_datatype.rb
|
164
|
+
- lib/polars/functions/business.rb
|
159
165
|
- lib/polars/functions/col.rb
|
166
|
+
- lib/polars/functions/datatype.rb
|
160
167
|
- lib/polars/functions/eager.rb
|
161
168
|
- lib/polars/functions/lazy.rb
|
162
169
|
- lib/polars/functions/len.rb
|
@@ -173,6 +180,7 @@ files:
|
|
173
180
|
- lib/polars/io/csv.rb
|
174
181
|
- lib/polars/io/database.rb
|
175
182
|
- lib/polars/io/delta.rb
|
183
|
+
- lib/polars/io/iceberg.rb
|
176
184
|
- lib/polars/io/ipc.rb
|
177
185
|
- lib/polars/io/json.rb
|
178
186
|
- lib/polars/io/ndjson.rb
|
@@ -185,6 +193,7 @@ files:
|
|
185
193
|
- lib/polars/meta_expr.rb
|
186
194
|
- lib/polars/name_expr.rb
|
187
195
|
- lib/polars/plot.rb
|
196
|
+
- lib/polars/query_opt_flags.rb
|
188
197
|
- lib/polars/rolling_group_by.rb
|
189
198
|
- lib/polars/scan_cast_options.rb
|
190
199
|
- lib/polars/schema.rb
|
@@ -203,6 +212,7 @@ files:
|
|
203
212
|
- lib/polars/utils/constants.rb
|
204
213
|
- lib/polars/utils/convert.rb
|
205
214
|
- lib/polars/utils/parse.rb
|
215
|
+
- lib/polars/utils/serde.rb
|
206
216
|
- lib/polars/utils/various.rb
|
207
217
|
- lib/polars/utils/wrap.rb
|
208
218
|
- lib/polars/version.rb
|