polars-df 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,11 +3,19 @@ module Polars
3
3
  module ExprDispatch
4
4
  private
5
5
 
6
+ def self.included(base)
7
+ base.attr_accessor :_s
8
+ base.singleton_class.attr_accessor :_accessor
9
+ end
10
+
6
11
  def method_missing(method, ...)
7
12
  return super unless self.class.method_defined?(method)
8
13
 
14
+ namespace = self.class._accessor
15
+
9
16
  s = Utils.wrap_s(_s)
10
17
  expr = Utils.col(s.name)
18
+ expr = expr.send(namespace) if namespace
11
19
  s.to_frame.select(expr.send(method, ...)).to_series
12
20
  end
13
21
  end
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # Starts a new GroupBy operation.
2
3
  class GroupBy
3
4
  # @private
4
5
  attr_accessor :_df, :_dataframe_class, :by, :maintain_order
data/lib/polars/io.rb CHANGED
@@ -183,7 +183,7 @@ module Polars
183
183
  # @param has_header [Boolean]
184
184
  # Indicate if the first row of dataset is a header or not.
185
185
  # If set to false, column names will be autogenerated in the
186
- # following format: ``column_x``, with ``x`` being an
186
+ # following format: `column_x`, with `x` being an
187
187
  # enumeration over every column in the dataset starting at 1.
188
188
  # @param sep [String]
189
189
  # Single byte character to use as delimiter in the file.
@@ -558,7 +558,7 @@ module Polars
558
558
  # "ham" => ["a", "b", "c"]
559
559
  # }
560
560
  # ).lazy
561
- # lf.filter(Polars.col("foo") < 3).collect()
561
+ # lf.filter(Polars.col("foo") < 3).collect
562
562
  # # =>
563
563
  # # shape: (2, 3)
564
564
  # # ┌─────┬─────┬─────┐
@@ -910,7 +910,7 @@ module Polars
910
910
  # [
911
911
  # (Polars.col("a") ** 2).alias("a^2"),
912
912
  # (Polars.col("b") / 2).alias("b/2"),
913
- # (Polars.col("c").is_not()).alias("not c")
913
+ # (Polars.col("c").is_not).alias("not c")
914
914
  # ]
915
915
  # ).collect
916
916
  # # =>
@@ -1256,8 +1256,12 @@ module Polars
1256
1256
  select(Utils.col("*").take_every(n))
1257
1257
  end
1258
1258
 
1259
- # def fill_null
1260
- # end
1259
+ # Fill null values using the specified value or strategy.
1260
+ #
1261
+ # @return [LazyFrame]
1262
+ def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
1263
+ select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
1264
+ end
1261
1265
 
1262
1266
  # Fill floating point NaN values.
1263
1267
  #
@@ -191,8 +191,16 @@ module Polars
191
191
  end
192
192
  end
193
193
 
194
- # def n_unique
195
- # end
194
+ # Count unique values.
195
+ #
196
+ # @return [Object]
197
+ def n_unique(column)
198
+ if column.is_a?(Series)
199
+ column.n_unique
200
+ else
201
+ col(column).n_unique
202
+ end
203
+ end
196
204
 
197
205
  # Get the first value.
198
206
  #
@@ -213,14 +221,61 @@ module Polars
213
221
  end
214
222
  end
215
223
 
216
- # def last
217
- # end
224
+ # Get the last value.
225
+ #
226
+ # Depending on the input type this function does different things:
227
+ #
228
+ # - nil -> expression to take last column of a context.
229
+ # - String -> syntactic sugar for `Polars.col(..).last`
230
+ # - Series -> Take last value in `Series`
231
+ #
232
+ # @return [Object]
233
+ def last(column = nil)
234
+ if column.nil?
235
+ return Utils.wrap_expr(_last)
236
+ end
218
237
 
219
- # def head
220
- # end
238
+ if column.is_a?(Series)
239
+ if column.len > 0
240
+ return column[-1]
241
+ else
242
+ raise IndexError, "The series is empty, so no last value can be returned"
243
+ end
244
+ end
245
+ col(column).last
246
+ end
221
247
 
222
- # def tail
223
- # end
248
+ # Get the first `n` rows.
249
+ #
250
+ # @param column [Object]
251
+ # Column name or Series.
252
+ # @param n [Integer]
253
+ # Number of rows to return.
254
+ #
255
+ # @return [Object]
256
+ def head(column, n = 10)
257
+ if column.is_a?(Series)
258
+ column.head(n)
259
+ else
260
+ col(column).head(n)
261
+ end
262
+ end
263
+
264
+ # Get the last `n` rows.
265
+ #
266
+ # @param column [Object]
267
+ # Column name or Series.
268
+ # @param n [Integer]
269
+ # Number of rows to return.
270
+ #
271
+ # @return [Object]
272
+ def tail(column, n = 10)
273
+ if column.is_a?(Series)
274
+ column.tail(n)
275
+ else
276
+ col(column).tail(n)
277
+ end
278
+ end
224
279
 
225
280
  # Return an expression representing a literal value.
226
281
  #
@@ -242,14 +297,69 @@ module Polars
242
297
  # def cumsum
243
298
  # end
244
299
 
245
- # def spearman_rank_corr
246
- # end
300
+ # Compute the spearman rank correlation between two columns.
301
+ #
302
+ # Missing data will be excluded from the computation.
303
+ #
304
+ # @param a [Object]
305
+ # Column name or Expression.
306
+ # @param b [Object]
307
+ # Column name or Expression.
308
+ # @param ddof [Integer]
309
+ # Delta degrees of freedom
310
+ # @param propagate_nans [Boolean]
311
+ # If `True` any `NaN` encountered will lead to `NaN` in the output.
312
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
313
+ # and thus lead to the highest rank.
314
+ #
315
+ # @return [Expr]
316
+ def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
317
+ if a.is_a?(String)
318
+ a = col(a)
319
+ end
320
+ if b.is_a?(String)
321
+ b = col(b)
322
+ end
323
+ Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
324
+ end
247
325
 
248
- # def pearson_corr
249
- # end
326
+ # Compute the pearson's correlation between two columns.
327
+ #
328
+ # @param a [Object]
329
+ # Column name or Expression.
330
+ # @param b [Object]
331
+ # Column name or Expression.
332
+ # @param ddof [Integer]
333
+ # Delta degrees of freedom
334
+ #
335
+ # @return [Expr]
336
+ def pearson_corr(a, b, ddof: 1)
337
+ if a.is_a?(String)
338
+ a = col(a)
339
+ end
340
+ if b.is_a?(String)
341
+ b = col(b)
342
+ end
343
+ Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
344
+ end
250
345
 
251
- # def cov
252
- # end
346
+ # Compute the covariance between two columns/ expressions.
347
+ #
348
+ # @param a [Object]
349
+ # Column name or Expression.
350
+ # @param b [Object]
351
+ # Column name or Expression.
352
+ #
353
+ # @return [Expr]
354
+ def cov(a, b)
355
+ if a.is_a?(String)
356
+ a = col(a)
357
+ end
358
+ if b.is_a?(String)
359
+ b = col(b)
360
+ end
361
+ Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
362
+ end
253
363
 
254
364
  # def map
255
365
  # end
@@ -408,7 +518,7 @@ module Polars
408
518
  # "bool" => [true, nil],
409
519
  # "list" => [[1, 2], [3]],
410
520
  # }
411
- # ).select([Polars.struct(Polars.all()).alias("my_struct")])
521
+ # ).select([Polars.struct(Polars.all).alias("my_struct")])
412
522
  # # =>
413
523
  # # shape: (2, 1)
414
524
  # # ┌─────────────────────┐
@@ -425,7 +535,7 @@ module Polars
425
535
  # df = Polars::DataFrame.new(
426
536
  # {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
427
537
  # )
428
- # df.with_column(pl.struct(pl.col(["a", "b"])).alias("a_and_b"))
538
+ # df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
429
539
  # # =>
430
540
  # # shape: (4, 4)
431
541
  # # ┌─────┬───────┬─────┬─────────────┐
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # Created by `df.lazy.groupby("foo")`.
2
3
  class LazyGroupBy
3
4
  # @private
4
5
  def initialize(lgb, lazyframe_class)