polars-df 0.8.0-x86_64-linux → 0.9.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +1726 -754
  6. data/LICENSE.txt +1 -1
  7. data/README.md +2 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/3.3/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +449 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/cat_expr.rb +24 -0
  14. data/lib/polars/cat_name_space.rb +75 -0
  15. data/lib/polars/config.rb +2 -2
  16. data/lib/polars/data_frame.rb +179 -43
  17. data/lib/polars/data_types.rb +191 -28
  18. data/lib/polars/date_time_expr.rb +31 -14
  19. data/lib/polars/exceptions.rb +12 -1
  20. data/lib/polars/expr.rb +866 -186
  21. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  22. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  23. data/lib/polars/functions/as_datatype.rb +248 -0
  24. data/lib/polars/functions/col.rb +47 -0
  25. data/lib/polars/functions/eager.rb +182 -0
  26. data/lib/polars/functions/lazy.rb +1280 -0
  27. data/lib/polars/functions/len.rb +49 -0
  28. data/lib/polars/functions/lit.rb +35 -0
  29. data/lib/polars/functions/random.rb +16 -0
  30. data/lib/polars/functions/range/date_range.rb +103 -0
  31. data/lib/polars/functions/range/int_range.rb +51 -0
  32. data/lib/polars/functions/repeat.rb +144 -0
  33. data/lib/polars/functions/whenthen.rb +27 -0
  34. data/lib/polars/functions.rb +29 -416
  35. data/lib/polars/group_by.rb +2 -2
  36. data/lib/polars/io.rb +18 -25
  37. data/lib/polars/lazy_frame.rb +367 -53
  38. data/lib/polars/list_expr.rb +152 -6
  39. data/lib/polars/list_name_space.rb +102 -0
  40. data/lib/polars/meta_expr.rb +175 -7
  41. data/lib/polars/series.rb +273 -34
  42. data/lib/polars/string_cache.rb +75 -0
  43. data/lib/polars/string_expr.rb +412 -96
  44. data/lib/polars/string_name_space.rb +4 -4
  45. data/lib/polars/testing.rb +507 -0
  46. data/lib/polars/utils.rb +52 -8
  47. data/lib/polars/version.rb +1 -1
  48. data/lib/polars.rb +15 -2
  49. metadata +33 -4
  50. data/lib/polars/lazy_functions.rb +0 -1181
@@ -9,6 +9,60 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
+ # Evaluate whether all boolean values in a list are true.
13
+ #
14
+ # @return [Expr]
15
+ #
16
+ # @example
17
+ # df = Polars::DataFrame.new(
18
+ # {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
19
+ # )
20
+ # df.with_columns(all: Polars.col("a").list.all)
21
+ # # =>
22
+ # # shape: (6, 2)
23
+ # # ┌────────────────┬───────┐
24
+ # # │ a ┆ all │
25
+ # # │ --- ┆ --- │
26
+ # # │ list[bool] ┆ bool │
27
+ # # ╞════════════════╪═══════╡
28
+ # # │ [true, true] ┆ true │
29
+ # # │ [false, true] ┆ false │
30
+ # # │ [false, false] ┆ false │
31
+ # # │ [null] ┆ true │
32
+ # # │ [] ┆ true │
33
+ # # │ null ┆ null │
34
+ # # └────────────────┴───────┘
35
+ def all
36
+ Utils.wrap_expr(_rbexpr.list_all)
37
+ end
38
+
39
+ # Evaluate whether any boolean value in a list is true.
40
+ #
41
+ # @return [Expr]
42
+ #
43
+ # @example
44
+ # df = Polars::DataFrame.new(
45
+ # {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
46
+ # )
47
+ # df.with_columns(any: Polars.col("a").list.any)
48
+ # # =>
49
+ # # shape: (6, 2)
50
+ # # ┌────────────────┬───────┐
51
+ # # │ a ┆ any │
52
+ # # │ --- ┆ --- │
53
+ # # │ list[bool] ┆ bool │
54
+ # # ╞════════════════╪═══════╡
55
+ # # │ [true, true] ┆ true │
56
+ # # │ [false, true] ┆ true │
57
+ # # │ [false, false] ┆ false │
58
+ # # │ [null] ┆ false │
59
+ # # │ [] ┆ false │
60
+ # # │ null ┆ null │
61
+ # # └────────────────┴───────┘
62
+ def any
63
+ Utils.wrap_expr(_rbexpr.list_any)
64
+ end
65
+
12
66
  # Get the length of the arrays as `:u32`.
13
67
  #
14
68
  # @return [Expr]
@@ -31,6 +85,80 @@ module Polars
31
85
  end
32
86
  alias_method :len, :lengths
33
87
 
88
+ # Drop all null values in the list.
89
+ #
90
+ # The original order of the remaining elements is preserved.
91
+ #
92
+ # @return [Expr]
93
+ #
94
+ # @example
95
+ # df = Polars::DataFrame.new({"values" => [[nil, 1, nil, 2], [nil], [3, 4]]})
96
+ # df.with_columns(drop_nulls: Polars.col("values").list.drop_nulls)
97
+ # # =>
98
+ # # shape: (3, 2)
99
+ # # ┌────────────────┬────────────┐
100
+ # # │ values ┆ drop_nulls │
101
+ # # │ --- ┆ --- │
102
+ # # │ list[i64] ┆ list[i64] │
103
+ # # ╞════════════════╪════════════╡
104
+ # # │ [null, 1, … 2] ┆ [1, 2] │
105
+ # # │ [null] ┆ [] │
106
+ # # │ [3, 4] ┆ [3, 4] │
107
+ # # └────────────────┴────────────┘
108
+ def drop_nulls
109
+ Utils.wrap_expr(_rbexpr.list_drop_nulls)
110
+ end
111
+
112
+ # Sample from this list.
113
+ #
114
+ # @param n [Integer]
115
+ # Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
116
+ # `fraction` is nil.
117
+ # @param fraction [Float]
118
+ # Fraction of items to return. Cannot be used with `n`.
119
+ # @param with_replacement [Boolean]
120
+ # Allow values to be sampled more than once.
121
+ # @param shuffle [Boolean]
122
+ # Shuffle the order of sampled data points.
123
+ # @param seed [Integer]
124
+ # Seed for the random number generator. If set to nil (default), a
125
+ # random seed is generated for each sample operation.
126
+ #
127
+ # @return [Expr]
128
+ #
129
+ # @example
130
+ # df = Polars::DataFrame.new({"values" => [[1, 2, 3], [4, 5]], "n" => [2, 1]})
131
+ # df.with_columns(sample: Polars.col("values").list.sample(n: Polars.col("n"), seed: 1))
132
+ # # =>
133
+ # # shape: (2, 3)
134
+ # # ┌───────────┬─────┬───────────┐
135
+ # # │ values ┆ n ┆ sample │
136
+ # # │ --- ┆ --- ┆ --- │
137
+ # # │ list[i64] ┆ i64 ┆ list[i64] │
138
+ # # ╞═══════════╪═════╪═══════════╡
139
+ # # │ [1, 2, 3] ┆ 2 ┆ [2, 1] │
140
+ # # │ [4, 5] ┆ 1 ┆ [5] │
141
+ # # └───────────┴─────┴───────────┘
142
+ def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
143
+ if !n.nil? && !fraction.nil?
144
+ msg = "cannot specify both `n` and `fraction`"
145
+ raise ArgumentError, msg
146
+ end
147
+
148
+ if !fraction.nil?
149
+ fraction = Utils.parse_as_expression(fraction)
150
+ return Utils.wrap_expr(
151
+ _rbexpr.list_sample_fraction(
152
+ fraction, with_replacement, shuffle, seed
153
+ )
154
+ )
155
+ end
156
+
157
+ n = 1 if n.nil?
158
+ n = Utils.parse_as_expression(n)
159
+ Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
160
+ end
161
+
34
162
  # Sum all the lists in the array.
35
163
  #
36
164
  # @return [Expr]
@@ -280,13 +408,29 @@ module Polars
280
408
  # Note that defaulting to raising an error is much cheaper
281
409
  #
282
410
  # @return [Expr]
283
- def take(index, null_on_oob: false)
411
+ #
412
+ # @example
413
+ # df = Polars::DataFrame.new({"a" => [[3, 2, 1], [], [1, 2, 3, 4, 5]]})
414
+ # df.with_columns(gather: Polars.col("a").list.gather([0, 4], null_on_oob: true))
415
+ # # =>
416
+ # # shape: (3, 2)
417
+ # # ┌─────────────┬──────────────┐
418
+ # # │ a ┆ gather │
419
+ # # │ --- ┆ --- │
420
+ # # │ list[i64] ┆ list[i64] │
421
+ # # ╞═════════════╪══════════════╡
422
+ # # │ [3, 2, 1] ┆ [3, null] │
423
+ # # │ [] ┆ [null, null] │
424
+ # # │ [1, 2, … 5] ┆ [1, 5] │
425
+ # # └─────────────┴──────────────┘
426
+ def gather(index, null_on_oob: false)
284
427
  if index.is_a?(::Array)
285
428
  index = Series.new(index)
286
429
  end
287
430
  index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
288
- Utils.wrap_expr(_rbexpr.list_take(index, null_on_oob))
431
+ Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
289
432
  end
433
+ alias_method :take, :gather
290
434
 
291
435
  # Get the first value of the sublists.
292
436
  #
@@ -363,6 +507,8 @@ module Polars
363
507
  #
364
508
  # @param separator [String]
365
509
  # string to separate the items with
510
+ # @param ignore_nulls [Boolean]
511
+ # Ignore null values (default).
366
512
  #
367
513
  # @return [Expr]
368
514
  #
@@ -379,9 +525,9 @@ module Polars
379
525
  # # │ a b c │
380
526
  # # │ x y │
381
527
  # # └───────┘
382
- def join(separator)
528
+ def join(separator, ignore_nulls: true)
383
529
  separator = Utils.parse_as_expression(separator, str_as_lit: true)
384
- Utils.wrap_expr(_rbexpr.list_join(separator))
530
+ Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
385
531
  end
386
532
 
387
533
  # Retrieve the index of the minimal value in every sublist.
@@ -544,8 +690,8 @@ module Polars
544
690
  # # [2, 1]
545
691
  # # ]
546
692
  def tail(n = 5)
547
- offset = -Utils.expr_to_lit_or_expr(n, str_to_lit: false)
548
- slice(offset, n)
693
+ n = Utils.parse_as_expression(n)
694
+ Utils.wrap_expr(_rbexpr.list_tail(n))
549
695
  end
550
696
 
551
697
  # Count how often the value produced by ``element`` occurs.
@@ -10,6 +10,56 @@ module Polars
10
10
  self._s = series._s
11
11
  end
12
12
 
13
+ # Evaluate whether all boolean values in a list are true.
14
+ #
15
+ # @return [Series]
16
+ #
17
+ # @example
18
+ # s = Polars::Series.new(
19
+ # [[true, true], [false, true], [false, false], [nil], [], nil],
20
+ # dtype: Polars::List.new(Polars::Boolean)
21
+ # )
22
+ # s.list.all
23
+ # # =>
24
+ # # shape: (6,)
25
+ # # Series: '' [bool]
26
+ # # [
27
+ # # true
28
+ # # false
29
+ # # false
30
+ # # true
31
+ # # true
32
+ # # null
33
+ # # ]
34
+ def all
35
+ super
36
+ end
37
+
38
+ # Evaluate whether any boolean value in a list is true.
39
+ #
40
+ # @return [Series]
41
+ #
42
+ # @example
43
+ # s = Polars::Series.new(
44
+ # [[true, true], [false, true], [false, false], [nil], [], nil],
45
+ # dtype: Polars::List.new(Polars::Boolean)
46
+ # )
47
+ # s.list.any
48
+ # # =>
49
+ # # shape: (6,)
50
+ # # Series: '' [bool]
51
+ # # [
52
+ # # true
53
+ # # true
54
+ # # false
55
+ # # false
56
+ # # false
57
+ # # null
58
+ # # ]
59
+ def any
60
+ super
61
+ end
62
+
13
63
  # Get the length of the arrays as UInt32.
14
64
  #
15
65
  # @return [Series]
@@ -28,6 +78,58 @@ module Polars
28
78
  super
29
79
  end
30
80
 
81
+ # Drop all null values in the list.
82
+ #
83
+ # The original order of the remaining elements is preserved.
84
+ #
85
+ # @return [Series]
86
+ #
87
+ # @example
88
+ # s = Polars::Series.new("values", [[nil, 1, nil, 2], [nil], [3, 4]])
89
+ # s.list.drop_nulls
90
+ # # =>
91
+ # # shape: (3,)
92
+ # # Series: 'values' [list[i64]]
93
+ # # [
94
+ # # [1, 2]
95
+ # # []
96
+ # # [3, 4]
97
+ # # ]
98
+ def drop_nulls
99
+ super
100
+ end
101
+
102
+ # Sample from this list.
103
+ #
104
+ # @param n [Integer]
105
+ # Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
106
+ # `fraction` is nil.
107
+ # @param fraction [Float]
108
+ # Fraction of items to return. Cannot be used with `n`.
109
+ # @param with_replacement [Boolean]
110
+ # Allow values to be sampled more than once.
111
+ # @param shuffle [Boolean]
112
+ # Shuffle the order of sampled data points.
113
+ # @param seed [Integer]
114
+ # Seed for the random number generator. If set to nil (default), a
115
+ # random seed is generated for each sample operation.
116
+ #
117
+ # @return [Series]
118
+ #
119
+ # @example
120
+ # s = Polars::Series.new("values", [[1, 2, 3], [4, 5]])
121
+ # s.list.sample(n: Polars::Series.new("n", [2, 1]), seed: 1)
122
+ # # =>
123
+ # # shape: (2,)
124
+ # # Series: 'values' [list[i64]]
125
+ # # [
126
+ # # [2, 1]
127
+ # # [5]
128
+ # # ]
129
+ def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
130
+ super
131
+ end
132
+
31
133
  # Sum all the arrays in the list.
32
134
  #
33
135
  # @return [Series]
@@ -23,9 +23,114 @@ module Polars
23
23
  !(self == other)
24
24
  end
25
25
 
26
+ # Indicate if this expression is the same as another expression.
27
+ #
28
+ # @return [Boolean]
29
+ #
30
+ # @example
31
+ # foo_bar = Polars.col("foo").alias("bar")
32
+ # foo = Polars.col("foo")
33
+ # foo_bar.meta.eq(foo)
34
+ # # => false
35
+ # foo_bar2 = Polars.col("foo").alias("bar")
36
+ # foo_bar.meta.eq(foo_bar2)
37
+ # # => true
38
+ def eq(other)
39
+ _rbexpr.meta_eq(other._rbexpr)
40
+ end
41
+
42
+ # Indicate if this expression is NOT the same as another expression.
43
+ #
44
+ # @return [Boolean]
45
+ #
46
+ # @example
47
+ # foo_bar = Polars.col("foo").alias("bar")
48
+ # foo = Polars.col("foo")
49
+ # foo_bar.meta.ne(foo)
50
+ # # => true
51
+ # foo_bar2 = Polars.col("foo").alias("bar")
52
+ # foo_bar.meta.ne(foo_bar2)
53
+ # # => false
54
+ def ne(other)
55
+ !eq(other)
56
+ end
57
+
58
+ # Indicate if this expression expands into multiple expressions.
59
+ #
60
+ # @return [Boolean]
61
+ #
62
+ # @example
63
+ # e = Polars.col(["a", "b"]).alias("bar")
64
+ # e.meta.has_multiple_outputs
65
+ # # => true
66
+ def has_multiple_outputs
67
+ _rbexpr.meta_has_multiple_outputs
68
+ end
69
+
70
+ # Indicate if this expression is a basic (non-regex) unaliased column.
71
+ #
72
+ # @return [Boolean]
73
+ #
74
+ # @example
75
+ # e = Polars.col("foo")
76
+ # e.meta.is_column
77
+ # # => true
78
+ # e = Polars.col("foo") * Polars.col("bar")
79
+ # e.meta.is_column
80
+ # # => false
81
+ # e = Polars.col("^col.*\d+$")
82
+ # e.meta.is_column
83
+ # # => false
84
+ def is_column
85
+ _rbexpr.meta_is_column
86
+ end
87
+
88
+ # Indicate if this expression expands to columns that match a regex pattern.
89
+ #
90
+ # @return [Boolean]
91
+ #
92
+ # @example
93
+ # e = Polars.col("^.*$").alias("bar")
94
+ # e.meta.is_regex_projection
95
+ # # => true
96
+ def is_regex_projection
97
+ _rbexpr.meta_is_regex_projection
98
+ end
99
+
100
+ # Get the column name that this expression would produce.
101
+ #
102
+ # @return [String]
103
+ #
104
+ # @example
105
+ # e = Polars.col("foo") * Polars.col("bar")
106
+ # e.meta.output_name
107
+ # # => "foo"
108
+ # e_filter = Polars.col("foo").filter(Polars.col("bar") == 13)
109
+ # e_filter.meta.output_name
110
+ # # => "foo"
111
+ # e_sum_over = Polars.sum("foo").over("groups")
112
+ # e_sum_over.meta.output_name
113
+ # # => "foo"
114
+ # e_sum_slice = Polars.sum("foo").slice(Polars.len - 10, Polars.col("bar"))
115
+ # e_sum_slice.meta.output_name
116
+ # # => "foo"
117
+ # Polars.len.meta.output_name
118
+ # # => "len"
119
+ def output_name
120
+ _rbexpr.meta_output_name
121
+ end
122
+
26
123
  # Pop the latest expression and return the input(s) of the popped expression.
27
124
  #
28
125
  # @return [Array]
126
+ #
127
+ # @example
128
+ # e = Polars.col("foo").alias("bar")
129
+ # first = e.meta.pop[0]
130
+ # _ = first.meta == Polars.col("foo")
131
+ # # => true
132
+ # _ = first.meta == Polars.col("bar")
133
+ # # => false
29
134
  def pop
30
135
  _rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
31
136
  end
@@ -33,22 +138,85 @@ module Polars
33
138
  # Get a list with the root column name.
34
139
  #
35
140
  # @return [Array]
141
+ #
142
+ # @example
143
+ # e = Polars.col("foo") * Polars.col("bar")
144
+ # e.meta.root_names
145
+ # # => ["foo", "bar"]
146
+ # e_filter = Polars.col("foo").filter(Polars.col("bar") == 13)
147
+ # e_filter.meta.root_names
148
+ # # => ["foo", "bar"]
149
+ # e_sum_over = Polars.sum("foo").over("groups")
150
+ # e_sum_over.meta.root_names
151
+ # # => ["foo", "groups"]
152
+ # e_sum_slice = Polars.sum("foo").slice(Polars.len - 10, Polars.col("bar"))
153
+ # e_sum_slice.meta.root_names
154
+ # # => ["foo", "bar"]
36
155
  def root_names
37
156
  _rbexpr.meta_roots
38
157
  end
39
158
 
40
- # Get the column name that this expression would produce.
41
- #
42
- # @return [String]
43
- def output_name
44
- _rbexpr.meta_output_name
45
- end
46
-
47
159
  # Undo any renaming operation like `alias` or `keep_name`.
48
160
  #
49
161
  # @return [Expr]
162
+ #
163
+ # @example
164
+ # e = Polars.col("foo").alias("bar")
165
+ # _ = e.meta.undo_aliases.meta == Polars.col("foo")
166
+ # # => true
167
+ # e = Polars.col("foo").sum.over("bar")
168
+ # _ = e.name.keep.meta.undo_aliases.meta == e
169
+ # # => true
50
170
  def undo_aliases
51
171
  Utils.wrap_expr(_rbexpr.meta_undo_aliases)
52
172
  end
173
+
174
+ # Turn this expression in a selector.
175
+ #
176
+ # @return [Expr]
177
+ def _as_selector
178
+ Utils.wrap_expr(_rbexpr._meta_as_selector)
179
+ end
180
+
181
+ # Add selectors.
182
+ #
183
+ # @return [Expr]
184
+ def _selector_add(other)
185
+ Utils.wrap_expr(_rbexpr._meta_selector_add(other._rbexpr))
186
+ end
187
+
188
+ # Subtract selectors.
189
+ #
190
+ # @return [Expr]
191
+ def _selector_sub(other)
192
+ Utils.wrap_expr(_rbexpr._meta_selector_sub(other._rbexpr))
193
+ end
194
+
195
+ # & selectors.
196
+ #
197
+ # @return [Expr]
198
+ def _selector_and(other)
199
+ Utils.wrap_expr(_rbexpr._meta_selector_and(other._rbexpr))
200
+ end
201
+
202
+ # Format the expression as a tree.
203
+ #
204
+ # @param return_as_string [Boolean]
205
+ # If true, return as string rather than printing to stdout.
206
+ #
207
+ # @return [String]
208
+ #
209
+ # @example
210
+ # e = (Polars.col("foo") * Polars.col("bar")).sum.over(Polars.col("ham")) / 2
211
+ # e.meta.tree_format(return_as_string: true)
212
+ def tree_format(return_as_string: false)
213
+ s = _rbexpr.meta_tree_format
214
+ if return_as_string
215
+ s
216
+ else
217
+ puts s
218
+ nil
219
+ end
220
+ end
53
221
  end
54
222
  end