polars-df 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,19 @@ module Polars
3
3
  module ExprDispatch
4
4
  private
5
5
 
6
+ def self.included(base)
7
+ base.attr_accessor :_s
8
+ base.singleton_class.attr_accessor :_accessor
9
+ end
10
+
6
11
  def method_missing(method, ...)
7
12
  return super unless self.class.method_defined?(method)
8
13
 
14
+ namespace = self.class._accessor
15
+
9
16
  s = Utils.wrap_s(_s)
10
17
  expr = Utils.col(s.name)
18
+ expr = expr.send(namespace) if namespace
11
19
  s.to_frame.select(expr.send(method, ...)).to_series
12
20
  end
13
21
  end
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # Starts a new GroupBy operation.
2
3
  class GroupBy
3
4
  # @private
4
5
  attr_accessor :_df, :_dataframe_class, :by, :maintain_order
data/lib/polars/io.rb CHANGED
@@ -183,7 +183,7 @@ module Polars
183
183
  # @param has_header [Boolean]
184
184
  # Indicate if the first row of dataset is a header or not.
185
185
  # If set to false, column names will be autogenerated in the
186
- # following format: ``column_x``, with ``x`` being an
186
+ # following format: `column_x`, with `x` being an
187
187
  # enumeration over every column in the dataset starting at 1.
188
188
  # @param sep [String]
189
189
  # Single byte character to use as delimiter in the file.
@@ -558,7 +558,7 @@ module Polars
558
558
  # "ham" => ["a", "b", "c"]
559
559
  # }
560
560
  # ).lazy
561
- # lf.filter(Polars.col("foo") < 3).collect()
561
+ # lf.filter(Polars.col("foo") < 3).collect
562
562
  # # =>
563
563
  # # shape: (2, 3)
564
564
  # # ┌─────┬─────┬─────┐
@@ -910,7 +910,7 @@ module Polars
910
910
  # [
911
911
  # (Polars.col("a") ** 2).alias("a^2"),
912
912
  # (Polars.col("b") / 2).alias("b/2"),
913
- # (Polars.col("c").is_not()).alias("not c")
913
+ # (Polars.col("c").is_not).alias("not c")
914
914
  # ]
915
915
  # ).collect
916
916
  # # =>
@@ -1256,8 +1256,12 @@ module Polars
1256
1256
  select(Utils.col("*").take_every(n))
1257
1257
  end
1258
1258
 
1259
- # def fill_null
1260
- # end
1259
+ # Fill null values using the specified value or strategy.
1260
+ #
1261
+ # @return [LazyFrame]
1262
+ def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: nil)
1263
+ select(Polars.all.fill_null(value, strategy: strategy, limit: limit))
1264
+ end
1261
1265
 
1262
1266
  # Fill floating point NaN values.
1263
1267
  #
@@ -191,8 +191,16 @@ module Polars
191
191
  end
192
192
  end
193
193
 
194
- # def n_unique
195
- # end
194
+ # Count unique values.
195
+ #
196
+ # @return [Object]
197
+ def n_unique(column)
198
+ if column.is_a?(Series)
199
+ column.n_unique
200
+ else
201
+ col(column).n_unique
202
+ end
203
+ end
196
204
 
197
205
  # Get the first value.
198
206
  #
@@ -213,14 +221,61 @@ module Polars
213
221
  end
214
222
  end
215
223
 
216
- # def last
217
- # end
224
+ # Get the last value.
225
+ #
226
+ # Depending on the input type this function does different things:
227
+ #
228
+ # - nil -> expression to take last column of a context.
229
+ # - String -> syntactic sugar for `Polars.col(..).last`
230
+ # - Series -> Take last value in `Series`
231
+ #
232
+ # @return [Object]
233
+ def last(column = nil)
234
+ if column.nil?
235
+ return Utils.wrap_expr(_last)
236
+ end
218
237
 
219
- # def head
220
- # end
238
+ if column.is_a?(Series)
239
+ if column.len > 0
240
+ return column[-1]
241
+ else
242
+ raise IndexError, "The series is empty, so no last value can be returned"
243
+ end
244
+ end
245
+ col(column).last
246
+ end
221
247
 
222
- # def tail
223
- # end
248
+ # Get the first `n` rows.
249
+ #
250
+ # @param column [Object]
251
+ # Column name or Series.
252
+ # @param n [Integer]
253
+ # Number of rows to return.
254
+ #
255
+ # @return [Object]
256
+ def head(column, n = 10)
257
+ if column.is_a?(Series)
258
+ column.head(n)
259
+ else
260
+ col(column).head(n)
261
+ end
262
+ end
263
+
264
+ # Get the last `n` rows.
265
+ #
266
+ # @param column [Object]
267
+ # Column name or Series.
268
+ # @param n [Integer]
269
+ # Number of rows to return.
270
+ #
271
+ # @return [Object]
272
+ def tail(column, n = 10)
273
+ if column.is_a?(Series)
274
+ column.tail(n)
275
+ else
276
+ col(column).tail(n)
277
+ end
278
+ end
224
279
 
225
280
  # Return an expression representing a literal value.
226
281
  #
@@ -242,14 +297,69 @@ module Polars
242
297
  # def cumsum
243
298
  # end
244
299
 
245
- # def spearman_rank_corr
246
- # end
300
+ # Compute the spearman rank correlation between two columns.
301
+ #
302
+ # Missing data will be excluded from the computation.
303
+ #
304
+ # @param a [Object]
305
+ # Column name or Expression.
306
+ # @param b [Object]
307
+ # Column name or Expression.
308
+ # @param ddof [Integer]
309
+ # Delta degrees of freedom
310
+ # @param propagate_nans [Boolean]
311
+ # If `True` any `NaN` encountered will lead to `NaN` in the output.
312
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
313
+ # and thus lead to the highest rank.
314
+ #
315
+ # @return [Expr]
316
+ def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
317
+ if a.is_a?(String)
318
+ a = col(a)
319
+ end
320
+ if b.is_a?(String)
321
+ b = col(b)
322
+ end
323
+ Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
324
+ end
247
325
 
248
- # def pearson_corr
249
- # end
326
+ # Compute the pearson's correlation between two columns.
327
+ #
328
+ # @param a [Object]
329
+ # Column name or Expression.
330
+ # @param b [Object]
331
+ # Column name or Expression.
332
+ # @param ddof [Integer]
333
+ # Delta degrees of freedom
334
+ #
335
+ # @return [Expr]
336
+ def pearson_corr(a, b, ddof: 1)
337
+ if a.is_a?(String)
338
+ a = col(a)
339
+ end
340
+ if b.is_a?(String)
341
+ b = col(b)
342
+ end
343
+ Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
344
+ end
250
345
 
251
- # def cov
252
- # end
346
+ # Compute the covariance between two columns/ expressions.
347
+ #
348
+ # @param a [Object]
349
+ # Column name or Expression.
350
+ # @param b [Object]
351
+ # Column name or Expression.
352
+ #
353
+ # @return [Expr]
354
+ def cov(a, b)
355
+ if a.is_a?(String)
356
+ a = col(a)
357
+ end
358
+ if b.is_a?(String)
359
+ b = col(b)
360
+ end
361
+ Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
362
+ end
253
363
 
254
364
  # def map
255
365
  # end
@@ -408,7 +518,7 @@ module Polars
408
518
  # "bool" => [true, nil],
409
519
  # "list" => [[1, 2], [3]],
410
520
  # }
411
- # ).select([Polars.struct(Polars.all()).alias("my_struct")])
521
+ # ).select([Polars.struct(Polars.all).alias("my_struct")])
412
522
  # # =>
413
523
  # # shape: (2, 1)
414
524
  # # ┌─────────────────────┐
@@ -425,7 +535,7 @@ module Polars
425
535
  # df = Polars::DataFrame.new(
426
536
  # {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
427
537
  # )
428
- # df.with_column(pl.struct(pl.col(["a", "b"])).alias("a_and_b"))
538
+ # df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
429
539
  # # =>
430
540
  # # shape: (4, 4)
431
541
  # # ┌─────┬───────┬─────┬─────────────┐
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # Created by `df.lazy.groupby("foo")`.
2
3
  class LazyGroupBy
3
4
  # @private
4
5
  def initialize(lgb, lazyframe_class)