polars-df 0.5.0-x86_64-linux → 0.7.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -20,12 +20,12 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
20
20
  Polars.read_csv("iris.csv")
21
21
  .lazy
22
22
  .filter(Polars.col("sepal_length") > 5)
23
- .groupby("species")
23
+ .group_by("species")
24
24
  .agg(Polars.all.sum)
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
29
 
30
30
  ## Reference
31
31
 
@@ -260,19 +260,19 @@ df["a"].var
260
260
  Group
261
261
 
262
262
  ```ruby
263
- df.groupby("a").count
263
+ df.group_by("a").count
264
264
  ```
265
265
 
266
266
  Works with all summary statistics
267
267
 
268
268
  ```ruby
269
- df.groupby("a").max
269
+ df.group_by("a").max
270
270
  ```
271
271
 
272
272
  Multiple groups
273
273
 
274
274
  ```ruby
275
- df.groupby(["a", "b"]).count
275
+ df.group_by(["a", "b"]).count
276
276
  ```
277
277
 
278
278
  ## Combining Data Frames
@@ -348,7 +348,7 @@ df.to_numo
348
348
  You can specify column types when creating a data frame
349
349
 
350
350
  ```ruby
351
- Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
351
+ Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
352
352
  ```
353
353
 
354
354
  Supported types are:
@@ -357,8 +357,10 @@ Supported types are:
357
357
  - float - `Float64`, `Float32`
358
358
  - integer - `Int64`, `Int32`, `Int16`, `Int8`
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
- - string - `Utf8`, `Categorical`
360
+ - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
+ - nested - `List`, `Struct`, `Array`
363
+ - other - `Object`, `Null`
362
364
 
363
365
  Get column types
364
366
 
@@ -401,13 +403,13 @@ df.plot("a", "b", type: "pie")
401
403
  Group data
402
404
 
403
405
  ```ruby
404
- df.groupby("c").plot("a", "b")
406
+ df.group_by("c").plot("a", "b")
405
407
  ```
406
408
 
407
409
  Stacked columns or bars
408
410
 
409
411
  ```ruby
410
- df.groupby("c").plot("a", "b", stacked: true)
412
+ df.group_by("c").plot("a", "b", stacked: true)
411
413
  ```
412
414
 
413
415
  ## History
Binary file
Binary file
Binary file
@@ -0,0 +1,84 @@
1
+ module Polars
2
+ # Namespace for array related expressions.
3
+ class ArrayExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Compute the min values of the sub-arrays.
13
+ #
14
+ # @return [Expr]
15
+ #
16
+ # @example
17
+ # df = Polars::DataFrame.new(
18
+ # {"a" => [[1, 2], [4, 3]]},
19
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
20
+ # )
21
+ # df.select(Polars.col("a").arr.min)
22
+ # # =>
23
+ # # shape: (2, 1)
24
+ # # ┌─────┐
25
+ # # │ a │
26
+ # # │ --- │
27
+ # # │ i64 │
28
+ # # ╞═════╡
29
+ # # │ 1 │
30
+ # # │ 3 │
31
+ # # └─────┘
32
+ def min
33
+ Utils.wrap_expr(_rbexpr.array_min)
34
+ end
35
+
36
+ # Compute the max values of the sub-arrays.
37
+ #
38
+ # @return [Expr]
39
+ #
40
+ # @example
41
+ # df = Polars::DataFrame.new(
42
+ # {"a" => [[1, 2], [4, 3]]},
43
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
44
+ # )
45
+ # df.select(Polars.col("a").arr.max)
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ a │
50
+ # # │ --- │
51
+ # # │ i64 │
52
+ # # ╞═════╡
53
+ # # │ 2 │
54
+ # # │ 4 │
55
+ # # └─────┘
56
+ def max
57
+ Utils.wrap_expr(_rbexpr.array_max)
58
+ end
59
+
60
+ # Compute the sum values of the sub-arrays.
61
+ #
62
+ # @return [Expr]
63
+ #
64
+ # @example
65
+ # df = Polars::DataFrame.new(
66
+ # {"a" => [[1, 2], [4, 3]]},
67
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
68
+ # )
69
+ # df.select(Polars.col("a").arr.sum)
70
+ # # =>
71
+ # # shape: (2, 1)
72
+ # # ┌─────┐
73
+ # # │ a │
74
+ # # │ --- │
75
+ # # │ i64 │
76
+ # # ╞═════╡
77
+ # # │ 3 │
78
+ # # │ 7 │
79
+ # # └─────┘
80
+ def sum
81
+ Utils.wrap_expr(_rbexpr.array_sum)
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,77 @@
1
+ module Polars
2
+ # Series.arr namespace.
3
+ class ArrayNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "arr"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Compute the min values of the sub-arrays.
14
+ #
15
+ # @return [Series]
16
+ #
17
+ # @example
18
+ # s = Polars::Series.new(
19
+ # "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
20
+ # )
21
+ # s.arr.min
22
+ # # =>
23
+ # # shape: (2,)
24
+ # # Series: 'a' [i64]
25
+ # # [
26
+ # # 1
27
+ # # 3
28
+ # # ]
29
+ def min
30
+ super
31
+ end
32
+
33
+ # Compute the max values of the sub-arrays.
34
+ #
35
+ # @return [Series]
36
+ #
37
+ # @example
38
+ # s = Polars::Series.new(
39
+ # "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
40
+ # )
41
+ # s.arr.max
42
+ # # =>
43
+ # # shape: (2,)
44
+ # # Series: 'a' [i64]
45
+ # # [
46
+ # # 2
47
+ # # 4
48
+ # # ]
49
+ def max
50
+ super
51
+ end
52
+
53
+ # Compute the sum values of the sub-arrays.
54
+ #
55
+ # @return [Series]
56
+ #
57
+ # @example
58
+ # df = Polars::DataFrame.new(
59
+ # {"a" => [[1, 2], [4, 3]]},
60
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
61
+ # )
62
+ # df.select(Polars.col("a").arr.sum)
63
+ # # =>
64
+ # # shape: (2, 1)
65
+ # # ┌─────┐
66
+ # # │ a │
67
+ # # │ --- │
68
+ # # │ i64 │
69
+ # # ╞═════╡
70
+ # # │ 3 │
71
+ # # │ 7 │
72
+ # # └─────┘
73
+ def sum
74
+ super
75
+ end
76
+ end
77
+ end
@@ -41,7 +41,7 @@ module Polars
41
41
  dtypes.each do|k, v|
42
42
  dtype_list << [k, Utils.rb_type_to_dtype(v)]
43
43
  end
44
- elsif dtypes.is_a?(Array)
44
+ elsif dtypes.is_a?(::Array)
45
45
  dtype_slice = dtypes
46
46
  else
47
47
  raise ArgumentError, "dtype arg should be list or dict"