polars-df 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
data/lib/polars/expr_dispatch.rb
CHANGED
@@ -3,11 +3,19 @@ module Polars
|
|
3
3
|
module ExprDispatch
|
4
4
|
private
|
5
5
|
|
6
|
+
def self.included(base)
|
7
|
+
base.attr_accessor :_s
|
8
|
+
base.singleton_class.attr_accessor :_accessor
|
9
|
+
end
|
10
|
+
|
6
11
|
def method_missing(method, ...)
|
7
12
|
return super unless self.class.method_defined?(method)
|
8
13
|
|
14
|
+
namespace = self.class._accessor
|
15
|
+
|
9
16
|
s = Utils.wrap_s(_s)
|
10
17
|
expr = Utils.col(s.name)
|
18
|
+
expr = expr.send(namespace) if namespace
|
11
19
|
s.to_frame.select(expr.send(method, ...)).to_series
|
12
20
|
end
|
13
21
|
end
|
data/lib/polars/group_by.rb
CHANGED
data/lib/polars/io.rb
CHANGED
@@ -183,7 +183,7 @@ module Polars
|
|
183
183
|
# @param has_header [Boolean]
|
184
184
|
# Indicate if the first row of dataset is a header or not.
|
185
185
|
# If set to false, column names will be autogenerated in the
|
186
|
-
# following format:
|
186
|
+
# following format: `column_x`, with `x` being an
|
187
187
|
# enumeration over every column in the dataset starting at 1.
|
188
188
|
# @param sep [String]
|
189
189
|
# Single byte character to use as delimiter in the file.
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -558,7 +558,7 @@ module Polars
|
|
558
558
|
# "ham" => ["a", "b", "c"]
|
559
559
|
# }
|
560
560
|
# ).lazy
|
561
|
-
# lf.filter(Polars.col("foo") < 3).collect
|
561
|
+
# lf.filter(Polars.col("foo") < 3).collect
|
562
562
|
# # =>
|
563
563
|
# # shape: (2, 3)
|
564
564
|
# # ┌─────┬─────┬─────┐
|
@@ -910,7 +910,7 @@ module Polars
|
|
910
910
|
# [
|
911
911
|
# (Polars.col("a") ** 2).alias("a^2"),
|
912
912
|
# (Polars.col("b") / 2).alias("b/2"),
|
913
|
-
# (Polars.col("c").is_not
|
913
|
+
# (Polars.col("c").is_not).alias("not c")
|
914
914
|
# ]
|
915
915
|
# ).collect
|
916
916
|
# # =>
|
@@ -1256,8 +1256,12 @@ module Polars
|
|
1256
1256
|
select(Utils.col("*").take_every(n))
|
1257
1257
|
end
|
1258
1258
|
|
1259
|
-
#
|
1260
|
-
#
|
1259
|
+
# Fill null values using the specified value or strategy.
|
1260
|
+
#
|
1261
|
+
# @return [LazyFrame]
|
1262
|
+
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
|
1263
|
+
select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
|
1264
|
+
end
|
1261
1265
|
|
1262
1266
|
# Fill floating point NaN values.
|
1263
1267
|
#
|
@@ -191,8 +191,16 @@ module Polars
|
|
191
191
|
end
|
192
192
|
end
|
193
193
|
|
194
|
-
#
|
195
|
-
#
|
194
|
+
# Count unique values.
|
195
|
+
#
|
196
|
+
# @return [Object]
|
197
|
+
def n_unique(column)
|
198
|
+
if column.is_a?(Series)
|
199
|
+
column.n_unique
|
200
|
+
else
|
201
|
+
col(column).n_unique
|
202
|
+
end
|
203
|
+
end
|
196
204
|
|
197
205
|
# Get the first value.
|
198
206
|
#
|
@@ -213,14 +221,61 @@ module Polars
|
|
213
221
|
end
|
214
222
|
end
|
215
223
|
|
216
|
-
#
|
217
|
-
#
|
224
|
+
# Get the last value.
|
225
|
+
#
|
226
|
+
# Depending on the input type this function does different things:
|
227
|
+
#
|
228
|
+
# - nil -> expression to take last column of a context.
|
229
|
+
# - String -> syntactic sugar for `Polars.col(..).last`
|
230
|
+
# - Series -> Take last value in `Series`
|
231
|
+
#
|
232
|
+
# @return [Object]
|
233
|
+
def last(column = nil)
|
234
|
+
if column.nil?
|
235
|
+
return Utils.wrap_expr(_last)
|
236
|
+
end
|
218
237
|
|
219
|
-
|
220
|
-
|
238
|
+
if column.is_a?(Series)
|
239
|
+
if column.len > 0
|
240
|
+
return column[-1]
|
241
|
+
else
|
242
|
+
raise IndexError, "The series is empty, so no last value can be returned"
|
243
|
+
end
|
244
|
+
end
|
245
|
+
col(column).last
|
246
|
+
end
|
221
247
|
|
222
|
-
#
|
223
|
-
#
|
248
|
+
# Get the first `n` rows.
|
249
|
+
#
|
250
|
+
# @param column [Object]
|
251
|
+
# Column name or Series.
|
252
|
+
# @param n [Integer]
|
253
|
+
# Number of rows to return.
|
254
|
+
#
|
255
|
+
# @return [Object]
|
256
|
+
def head(column, n = 10)
|
257
|
+
if column.is_a?(Series)
|
258
|
+
column.head(n)
|
259
|
+
else
|
260
|
+
col(column).head(n)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
# Get the last `n` rows.
|
265
|
+
#
|
266
|
+
# @param column [Object]
|
267
|
+
# Column name or Series.
|
268
|
+
# @param n [Integer]
|
269
|
+
# Number of rows to return.
|
270
|
+
#
|
271
|
+
# @return [Object]
|
272
|
+
def tail(column, n = 10)
|
273
|
+
if column.is_a?(Series)
|
274
|
+
column.tail(n)
|
275
|
+
else
|
276
|
+
col(column).tail(n)
|
277
|
+
end
|
278
|
+
end
|
224
279
|
|
225
280
|
# Return an expression representing a literal value.
|
226
281
|
#
|
@@ -242,14 +297,69 @@ module Polars
|
|
242
297
|
# def cumsum
|
243
298
|
# end
|
244
299
|
|
245
|
-
#
|
246
|
-
#
|
300
|
+
# Compute the spearman rank correlation between two columns.
|
301
|
+
#
|
302
|
+
# Missing data will be excluded from the computation.
|
303
|
+
#
|
304
|
+
# @param a [Object]
|
305
|
+
# Column name or Expression.
|
306
|
+
# @param b [Object]
|
307
|
+
# Column name or Expression.
|
308
|
+
# @param ddof [Integer]
|
309
|
+
# Delta degrees of freedom
|
310
|
+
# @param propagate_nans [Boolean]
|
311
|
+
# If `True` any `NaN` encountered will lead to `NaN` in the output.
|
312
|
+
# Defaults to `False` where `NaN` are regarded as larger than any finite number
|
313
|
+
# and thus lead to the highest rank.
|
314
|
+
#
|
315
|
+
# @return [Expr]
|
316
|
+
def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
|
317
|
+
if a.is_a?(String)
|
318
|
+
a = col(a)
|
319
|
+
end
|
320
|
+
if b.is_a?(String)
|
321
|
+
b = col(b)
|
322
|
+
end
|
323
|
+
Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
|
324
|
+
end
|
247
325
|
|
248
|
-
#
|
249
|
-
#
|
326
|
+
# Compute the pearson's correlation between two columns.
|
327
|
+
#
|
328
|
+
# @param a [Object]
|
329
|
+
# Column name or Expression.
|
330
|
+
# @param b [Object]
|
331
|
+
# Column name or Expression.
|
332
|
+
# @param ddof [Integer]
|
333
|
+
# Delta degrees of freedom
|
334
|
+
#
|
335
|
+
# @return [Expr]
|
336
|
+
def pearson_corr(a, b, ddof: 1)
|
337
|
+
if a.is_a?(String)
|
338
|
+
a = col(a)
|
339
|
+
end
|
340
|
+
if b.is_a?(String)
|
341
|
+
b = col(b)
|
342
|
+
end
|
343
|
+
Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
|
344
|
+
end
|
250
345
|
|
251
|
-
#
|
252
|
-
#
|
346
|
+
# Compute the covariance between two columns/ expressions.
|
347
|
+
#
|
348
|
+
# @param a [Object]
|
349
|
+
# Column name or Expression.
|
350
|
+
# @param b [Object]
|
351
|
+
# Column name or Expression.
|
352
|
+
#
|
353
|
+
# @return [Expr]
|
354
|
+
def cov(a, b)
|
355
|
+
if a.is_a?(String)
|
356
|
+
a = col(a)
|
357
|
+
end
|
358
|
+
if b.is_a?(String)
|
359
|
+
b = col(b)
|
360
|
+
end
|
361
|
+
Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
|
362
|
+
end
|
253
363
|
|
254
364
|
# def map
|
255
365
|
# end
|
@@ -408,7 +518,7 @@ module Polars
|
|
408
518
|
# "bool" => [true, nil],
|
409
519
|
# "list" => [[1, 2], [3]],
|
410
520
|
# }
|
411
|
-
# ).select([Polars.struct(Polars.all
|
521
|
+
# ).select([Polars.struct(Polars.all).alias("my_struct")])
|
412
522
|
# # =>
|
413
523
|
# # shape: (2, 1)
|
414
524
|
# # ┌─────────────────────┐
|
@@ -425,7 +535,7 @@ module Polars
|
|
425
535
|
# df = Polars::DataFrame.new(
|
426
536
|
# {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
|
427
537
|
# )
|
428
|
-
# df.with_column(
|
538
|
+
# df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
|
429
539
|
# # =>
|
430
540
|
# # shape: (4, 4)
|
431
541
|
# # ┌─────┬───────┬─────┬─────────────┐
|