polars-df 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
data/lib/polars/expr_dispatch.rb
CHANGED
@@ -3,11 +3,19 @@ module Polars
|
|
3
3
|
module ExprDispatch
|
4
4
|
private
|
5
5
|
|
6
|
+
def self.included(base)
|
7
|
+
base.attr_accessor :_s
|
8
|
+
base.singleton_class.attr_accessor :_accessor
|
9
|
+
end
|
10
|
+
|
6
11
|
def method_missing(method, ...)
|
7
12
|
return super unless self.class.method_defined?(method)
|
8
13
|
|
14
|
+
namespace = self.class._accessor
|
15
|
+
|
9
16
|
s = Utils.wrap_s(_s)
|
10
17
|
expr = Utils.col(s.name)
|
18
|
+
expr = expr.send(namespace) if namespace
|
11
19
|
s.to_frame.select(expr.send(method, ...)).to_series
|
12
20
|
end
|
13
21
|
end
|
data/lib/polars/group_by.rb
CHANGED
data/lib/polars/io.rb
CHANGED
@@ -183,7 +183,7 @@ module Polars
|
|
183
183
|
# @param has_header [Boolean]
|
184
184
|
# Indicate if the first row of dataset is a header or not.
|
185
185
|
# If set to false, column names will be autogenerated in the
|
186
|
-
# following format:
|
186
|
+
# following format: `column_x`, with `x` being an
|
187
187
|
# enumeration over every column in the dataset starting at 1.
|
188
188
|
# @param sep [String]
|
189
189
|
# Single byte character to use as delimiter in the file.
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -558,7 +558,7 @@ module Polars
|
|
558
558
|
# "ham" => ["a", "b", "c"]
|
559
559
|
# }
|
560
560
|
# ).lazy
|
561
|
-
# lf.filter(Polars.col("foo") < 3).collect
|
561
|
+
# lf.filter(Polars.col("foo") < 3).collect
|
562
562
|
# # =>
|
563
563
|
# # shape: (2, 3)
|
564
564
|
# # ┌─────┬─────┬─────┐
|
@@ -910,7 +910,7 @@ module Polars
|
|
910
910
|
# [
|
911
911
|
# (Polars.col("a") ** 2).alias("a^2"),
|
912
912
|
# (Polars.col("b") / 2).alias("b/2"),
|
913
|
-
# (Polars.col("c").is_not
|
913
|
+
# (Polars.col("c").is_not).alias("not c")
|
914
914
|
# ]
|
915
915
|
# ).collect
|
916
916
|
# # =>
|
@@ -1256,8 +1256,12 @@ module Polars
|
|
1256
1256
|
select(Utils.col("*").take_every(n))
|
1257
1257
|
end
|
1258
1258
|
|
1259
|
-
#
|
1260
|
-
#
|
1259
|
+
# Fill null values using the specified value or strategy.
|
1260
|
+
#
|
1261
|
+
# @return [LazyFrame]
|
1262
|
+
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
|
1263
|
+
select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
|
1264
|
+
end
|
1261
1265
|
|
1262
1266
|
# Fill floating point NaN values.
|
1263
1267
|
#
|
@@ -191,8 +191,16 @@ module Polars
|
|
191
191
|
end
|
192
192
|
end
|
193
193
|
|
194
|
-
#
|
195
|
-
#
|
194
|
+
# Count unique values.
|
195
|
+
#
|
196
|
+
# @return [Object]
|
197
|
+
def n_unique(column)
|
198
|
+
if column.is_a?(Series)
|
199
|
+
column.n_unique
|
200
|
+
else
|
201
|
+
col(column).n_unique
|
202
|
+
end
|
203
|
+
end
|
196
204
|
|
197
205
|
# Get the first value.
|
198
206
|
#
|
@@ -213,14 +221,61 @@ module Polars
|
|
213
221
|
end
|
214
222
|
end
|
215
223
|
|
216
|
-
#
|
217
|
-
#
|
224
|
+
# Get the last value.
|
225
|
+
#
|
226
|
+
# Depending on the input type this function does different things:
|
227
|
+
#
|
228
|
+
# - nil -> expression to take last column of a context.
|
229
|
+
# - String -> syntactic sugar for `Polars.col(..).last`
|
230
|
+
# - Series -> Take last value in `Series`
|
231
|
+
#
|
232
|
+
# @return [Object]
|
233
|
+
def last(column = nil)
|
234
|
+
if column.nil?
|
235
|
+
return Utils.wrap_expr(_last)
|
236
|
+
end
|
218
237
|
|
219
|
-
|
220
|
-
|
238
|
+
if column.is_a?(Series)
|
239
|
+
if column.len > 0
|
240
|
+
return column[-1]
|
241
|
+
else
|
242
|
+
raise IndexError, "The series is empty, so no last value can be returned"
|
243
|
+
end
|
244
|
+
end
|
245
|
+
col(column).last
|
246
|
+
end
|
221
247
|
|
222
|
-
#
|
223
|
-
#
|
248
|
+
# Get the first `n` rows.
|
249
|
+
#
|
250
|
+
# @param column [Object]
|
251
|
+
# Column name or Series.
|
252
|
+
# @param n [Integer]
|
253
|
+
# Number of rows to return.
|
254
|
+
#
|
255
|
+
# @return [Object]
|
256
|
+
def head(column, n = 10)
|
257
|
+
if column.is_a?(Series)
|
258
|
+
column.head(n)
|
259
|
+
else
|
260
|
+
col(column).head(n)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
# Get the last `n` rows.
|
265
|
+
#
|
266
|
+
# @param column [Object]
|
267
|
+
# Column name or Series.
|
268
|
+
# @param n [Integer]
|
269
|
+
# Number of rows to return.
|
270
|
+
#
|
271
|
+
# @return [Object]
|
272
|
+
def tail(column, n = 10)
|
273
|
+
if column.is_a?(Series)
|
274
|
+
column.tail(n)
|
275
|
+
else
|
276
|
+
col(column).tail(n)
|
277
|
+
end
|
278
|
+
end
|
224
279
|
|
225
280
|
# Return an expression representing a literal value.
|
226
281
|
#
|
@@ -242,14 +297,69 @@ module Polars
|
|
242
297
|
# def cumsum
|
243
298
|
# end
|
244
299
|
|
245
|
-
#
|
246
|
-
#
|
300
|
+
# Compute the spearman rank correlation between two columns.
|
301
|
+
#
|
302
|
+
# Missing data will be excluded from the computation.
|
303
|
+
#
|
304
|
+
# @param a [Object]
|
305
|
+
# Column name or Expression.
|
306
|
+
# @param b [Object]
|
307
|
+
# Column name or Expression.
|
308
|
+
# @param ddof [Integer]
|
309
|
+
# Delta degrees of freedom
|
310
|
+
# @param propagate_nans [Boolean]
|
311
|
+
# If `True` any `NaN` encountered will lead to `NaN` in the output.
|
312
|
+
# Defaults to `False` where `NaN` are regarded as larger than any finite number
|
313
|
+
# and thus lead to the highest rank.
|
314
|
+
#
|
315
|
+
# @return [Expr]
|
316
|
+
def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
|
317
|
+
if a.is_a?(String)
|
318
|
+
a = col(a)
|
319
|
+
end
|
320
|
+
if b.is_a?(String)
|
321
|
+
b = col(b)
|
322
|
+
end
|
323
|
+
Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
|
324
|
+
end
|
247
325
|
|
248
|
-
#
|
249
|
-
#
|
326
|
+
# Compute the pearson's correlation between two columns.
|
327
|
+
#
|
328
|
+
# @param a [Object]
|
329
|
+
# Column name or Expression.
|
330
|
+
# @param b [Object]
|
331
|
+
# Column name or Expression.
|
332
|
+
# @param ddof [Integer]
|
333
|
+
# Delta degrees of freedom
|
334
|
+
#
|
335
|
+
# @return [Expr]
|
336
|
+
def pearson_corr(a, b, ddof: 1)
|
337
|
+
if a.is_a?(String)
|
338
|
+
a = col(a)
|
339
|
+
end
|
340
|
+
if b.is_a?(String)
|
341
|
+
b = col(b)
|
342
|
+
end
|
343
|
+
Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
|
344
|
+
end
|
250
345
|
|
251
|
-
#
|
252
|
-
#
|
346
|
+
# Compute the covariance between two columns/ expressions.
|
347
|
+
#
|
348
|
+
# @param a [Object]
|
349
|
+
# Column name or Expression.
|
350
|
+
# @param b [Object]
|
351
|
+
# Column name or Expression.
|
352
|
+
#
|
353
|
+
# @return [Expr]
|
354
|
+
def cov(a, b)
|
355
|
+
if a.is_a?(String)
|
356
|
+
a = col(a)
|
357
|
+
end
|
358
|
+
if b.is_a?(String)
|
359
|
+
b = col(b)
|
360
|
+
end
|
361
|
+
Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
|
362
|
+
end
|
253
363
|
|
254
364
|
# def map
|
255
365
|
# end
|
@@ -408,7 +518,7 @@ module Polars
|
|
408
518
|
# "bool" => [true, nil],
|
409
519
|
# "list" => [[1, 2], [3]],
|
410
520
|
# }
|
411
|
-
# ).select([Polars.struct(Polars.all
|
521
|
+
# ).select([Polars.struct(Polars.all).alias("my_struct")])
|
412
522
|
# # =>
|
413
523
|
# # shape: (2, 1)
|
414
524
|
# # ┌─────────────────────┐
|
@@ -425,7 +535,7 @@ module Polars
|
|
425
535
|
# df = Polars::DataFrame.new(
|
426
536
|
# {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
|
427
537
|
# )
|
428
|
-
# df.with_column(
|
538
|
+
# df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
|
429
539
|
# # =>
|
430
540
|
# # shape: (4, 4)
|
431
541
|
# # ┌─────┬───────┬─────┬─────────────┐
|