polars-df 0.8.0-arm64-darwin → 0.10.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +3112 -1613
  6. data/LICENSE.txt +1 -1
  7. data/README.md +3 -2
  8. data/lib/polars/3.1/polars.bundle +0 -0
  9. data/lib/polars/3.2/polars.bundle +0 -0
  10. data/lib/polars/3.3/polars.bundle +0 -0
  11. data/lib/polars/array_expr.rb +453 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/batched_csv_reader.rb +4 -2
  14. data/lib/polars/cat_expr.rb +24 -0
  15. data/lib/polars/cat_name_space.rb +75 -0
  16. data/lib/polars/config.rb +2 -2
  17. data/lib/polars/data_frame.rb +306 -96
  18. data/lib/polars/data_types.rb +191 -28
  19. data/lib/polars/date_time_expr.rb +41 -18
  20. data/lib/polars/date_time_name_space.rb +9 -3
  21. data/lib/polars/exceptions.rb +12 -1
  22. data/lib/polars/expr.rb +898 -215
  23. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  24. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  25. data/lib/polars/functions/as_datatype.rb +248 -0
  26. data/lib/polars/functions/col.rb +47 -0
  27. data/lib/polars/functions/eager.rb +182 -0
  28. data/lib/polars/functions/lazy.rb +1280 -0
  29. data/lib/polars/functions/len.rb +49 -0
  30. data/lib/polars/functions/lit.rb +35 -0
  31. data/lib/polars/functions/random.rb +16 -0
  32. data/lib/polars/functions/range/date_range.rb +103 -0
  33. data/lib/polars/functions/range/int_range.rb +51 -0
  34. data/lib/polars/functions/repeat.rb +144 -0
  35. data/lib/polars/functions/whenthen.rb +96 -0
  36. data/lib/polars/functions.rb +29 -416
  37. data/lib/polars/group_by.rb +2 -2
  38. data/lib/polars/io.rb +36 -31
  39. data/lib/polars/lazy_frame.rb +405 -88
  40. data/lib/polars/list_expr.rb +158 -8
  41. data/lib/polars/list_name_space.rb +102 -0
  42. data/lib/polars/meta_expr.rb +175 -7
  43. data/lib/polars/series.rb +282 -41
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +413 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/testing.rb +507 -0
  48. data/lib/polars/utils.rb +106 -8
  49. data/lib/polars/version.rb +1 -1
  50. data/lib/polars/whenthen.rb +83 -0
  51. data/lib/polars.rb +16 -4
  52. metadata +34 -6
  53. data/lib/polars/lazy_functions.rb +0 -1181
  54. data/lib/polars/when.rb +0 -16
  55. data/lib/polars/when_then.rb +0 -19
@@ -9,6 +9,60 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
+ # Evaluate whether all boolean values in a list are true.
13
+ #
14
+ # @return [Expr]
15
+ #
16
+ # @example
17
+ # df = Polars::DataFrame.new(
18
+ # {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
19
+ # )
20
+ # df.with_columns(all: Polars.col("a").list.all)
21
+ # # =>
22
+ # # shape: (6, 2)
23
+ # # ┌────────────────┬───────┐
24
+ # # │ a ┆ all │
25
+ # # │ --- ┆ --- │
26
+ # # │ list[bool] ┆ bool │
27
+ # # ╞════════════════╪═══════╡
28
+ # # │ [true, true] ┆ true │
29
+ # # │ [false, true] ┆ false │
30
+ # # │ [false, false] ┆ false │
31
+ # # │ [null] ┆ true │
32
+ # # │ [] ┆ true │
33
+ # # │ null ┆ null │
34
+ # # └────────────────┴───────┘
35
+ def all
36
+ Utils.wrap_expr(_rbexpr.list_all)
37
+ end
38
+
39
+ # Evaluate whether any boolean value in a list is true.
40
+ #
41
+ # @return [Expr]
42
+ #
43
+ # @example
44
+ # df = Polars::DataFrame.new(
45
+ # {"a" => [[true, true], [false, true], [false, false], [nil], [], nil]}
46
+ # )
47
+ # df.with_columns(any: Polars.col("a").list.any)
48
+ # # =>
49
+ # # shape: (6, 2)
50
+ # # ┌────────────────┬───────┐
51
+ # # │ a ┆ any │
52
+ # # │ --- ┆ --- │
53
+ # # │ list[bool] ┆ bool │
54
+ # # ╞════════════════╪═══════╡
55
+ # # │ [true, true] ┆ true │
56
+ # # │ [false, true] ┆ true │
57
+ # # │ [false, false] ┆ false │
58
+ # # │ [null] ┆ false │
59
+ # # │ [] ┆ false │
60
+ # # │ null ┆ null │
61
+ # # └────────────────┴───────┘
62
+ def any
63
+ Utils.wrap_expr(_rbexpr.list_any)
64
+ end
65
+
12
66
  # Get the length of the arrays as `:u32`.
13
67
  #
14
68
  # @return [Expr]
@@ -31,6 +85,80 @@ module Polars
31
85
  end
32
86
  alias_method :len, :lengths
33
87
 
88
+ # Drop all null values in the list.
89
+ #
90
+ # The original order of the remaining elements is preserved.
91
+ #
92
+ # @return [Expr]
93
+ #
94
+ # @example
95
+ # df = Polars::DataFrame.new({"values" => [[nil, 1, nil, 2], [nil], [3, 4]]})
96
+ # df.with_columns(drop_nulls: Polars.col("values").list.drop_nulls)
97
+ # # =>
98
+ # # shape: (3, 2)
99
+ # # ┌────────────────┬────────────┐
100
+ # # │ values ┆ drop_nulls │
101
+ # # │ --- ┆ --- │
102
+ # # │ list[i64] ┆ list[i64] │
103
+ # # ╞════════════════╪════════════╡
104
+ # # │ [null, 1, … 2] ┆ [1, 2] │
105
+ # # │ [null] ┆ [] │
106
+ # # │ [3, 4] ┆ [3, 4] │
107
+ # # └────────────────┴────────────┘
108
+ def drop_nulls
109
+ Utils.wrap_expr(_rbexpr.list_drop_nulls)
110
+ end
111
+
112
+ # Sample from this list.
113
+ #
114
+ # @param n [Integer]
115
+ # Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
116
+ # `fraction` is nil.
117
+ # @param fraction [Float]
118
+ # Fraction of items to return. Cannot be used with `n`.
119
+ # @param with_replacement [Boolean]
120
+ # Allow values to be sampled more than once.
121
+ # @param shuffle [Boolean]
122
+ # Shuffle the order of sampled data points.
123
+ # @param seed [Integer]
124
+ # Seed for the random number generator. If set to nil (default), a
125
+ # random seed is generated for each sample operation.
126
+ #
127
+ # @return [Expr]
128
+ #
129
+ # @example
130
+ # df = Polars::DataFrame.new({"values" => [[1, 2, 3], [4, 5]], "n" => [2, 1]})
131
+ # df.with_columns(sample: Polars.col("values").list.sample(n: Polars.col("n"), seed: 1))
132
+ # # =>
133
+ # # shape: (2, 3)
134
+ # # ┌───────────┬─────┬───────────┐
135
+ # # │ values ┆ n ┆ sample │
136
+ # # │ --- ┆ --- ┆ --- │
137
+ # # │ list[i64] ┆ i64 ┆ list[i64] │
138
+ # # ╞═══════════╪═════╪═══════════╡
139
+ # # │ [1, 2, 3] ┆ 2 ┆ [2, 1] │
140
+ # # │ [4, 5] ┆ 1 ┆ [5] │
141
+ # # └───────────┴─────┴───────────┘
142
+ def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
143
+ if !n.nil? && !fraction.nil?
144
+ msg = "cannot specify both `n` and `fraction`"
145
+ raise ArgumentError, msg
146
+ end
147
+
148
+ if !fraction.nil?
149
+ fraction = Utils.parse_as_expression(fraction)
150
+ return Utils.wrap_expr(
151
+ _rbexpr.list_sample_fraction(
152
+ fraction, with_replacement, shuffle, seed
153
+ )
154
+ )
155
+ end
156
+
157
+ n = 1 if n.nil?
158
+ n = Utils.parse_as_expression(n)
159
+ Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
160
+ end
161
+
34
162
  # Sum all the lists in the array.
35
163
  #
36
164
  # @return [Expr]
@@ -237,6 +365,10 @@ module Polars
237
365
  #
238
366
  # @param index [Integer]
239
367
  # Index to return per sublist
368
+ # @param null_on_oob [Boolean]
369
+ # Behavior if an index is out of bounds:
370
+ # true -> set as null
371
+ # false -> raise an error
240
372
  #
241
373
  # @return [Expr]
242
374
  #
@@ -254,9 +386,9 @@ module Polars
254
386
  # # │ null │
255
387
  # # │ 1 │
256
388
  # # └──────┘
257
- def get(index)
389
+ def get(index, null_on_oob: true)
258
390
  index = Utils.parse_as_expression(index)
259
- Utils.wrap_expr(_rbexpr.list_get(index))
391
+ Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
260
392
  end
261
393
 
262
394
  # Get the value by index in the sublists.
@@ -280,13 +412,29 @@ module Polars
280
412
  # Note that defaulting to raising an error is much cheaper
281
413
  #
282
414
  # @return [Expr]
283
- def take(index, null_on_oob: false)
415
+ #
416
+ # @example
417
+ # df = Polars::DataFrame.new({"a" => [[3, 2, 1], [], [1, 2, 3, 4, 5]]})
418
+ # df.with_columns(gather: Polars.col("a").list.gather([0, 4], null_on_oob: true))
419
+ # # =>
420
+ # # shape: (3, 2)
421
+ # # ┌─────────────┬──────────────┐
422
+ # # │ a ┆ gather │
423
+ # # │ --- ┆ --- │
424
+ # # │ list[i64] ┆ list[i64] │
425
+ # # ╞═════════════╪══════════════╡
426
+ # # │ [3, 2, 1] ┆ [3, null] │
427
+ # # │ [] ┆ [null, null] │
428
+ # # │ [1, 2, … 5] ┆ [1, 5] │
429
+ # # └─────────────┴──────────────┘
430
+ def gather(index, null_on_oob: false)
284
431
  if index.is_a?(::Array)
285
432
  index = Series.new(index)
286
433
  end
287
434
  index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
288
- Utils.wrap_expr(_rbexpr.list_take(index, null_on_oob))
435
+ Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
289
436
  end
437
+ alias_method :take, :gather
290
438
 
291
439
  # Get the first value of the sublists.
292
440
  #
@@ -363,6 +511,8 @@ module Polars
363
511
  #
364
512
  # @param separator [String]
365
513
  # string to separate the items with
514
+ # @param ignore_nulls [Boolean]
515
+ # Ignore null values (default).
366
516
  #
367
517
  # @return [Expr]
368
518
  #
@@ -379,9 +529,9 @@ module Polars
379
529
  # # │ a b c │
380
530
  # # │ x y │
381
531
  # # └───────┘
382
- def join(separator)
532
+ def join(separator, ignore_nulls: true)
383
533
  separator = Utils.parse_as_expression(separator, str_as_lit: true)
384
- Utils.wrap_expr(_rbexpr.list_join(separator))
534
+ Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
385
535
  end
386
536
 
387
537
  # Retrieve the index of the minimal value in every sublist.
@@ -544,8 +694,8 @@ module Polars
544
694
  # # [2, 1]
545
695
  # # ]
546
696
  def tail(n = 5)
547
- offset = -Utils.expr_to_lit_or_expr(n, str_to_lit: false)
548
- slice(offset, n)
697
+ n = Utils.parse_as_expression(n)
698
+ Utils.wrap_expr(_rbexpr.list_tail(n))
549
699
  end
550
700
 
551
701
  # Count how often the value produced by ``element`` occurs.
@@ -10,6 +10,56 @@ module Polars
10
10
  self._s = series._s
11
11
  end
12
12
 
13
+ # Evaluate whether all boolean values in a list are true.
14
+ #
15
+ # @return [Series]
16
+ #
17
+ # @example
18
+ # s = Polars::Series.new(
19
+ # [[true, true], [false, true], [false, false], [nil], [], nil],
20
+ # dtype: Polars::List.new(Polars::Boolean)
21
+ # )
22
+ # s.list.all
23
+ # # =>
24
+ # # shape: (6,)
25
+ # # Series: '' [bool]
26
+ # # [
27
+ # # true
28
+ # # false
29
+ # # false
30
+ # # true
31
+ # # true
32
+ # # null
33
+ # # ]
34
+ def all
35
+ super
36
+ end
37
+
38
+ # Evaluate whether any boolean value in a list is true.
39
+ #
40
+ # @return [Series]
41
+ #
42
+ # @example
43
+ # s = Polars::Series.new(
44
+ # [[true, true], [false, true], [false, false], [nil], [], nil],
45
+ # dtype: Polars::List.new(Polars::Boolean)
46
+ # )
47
+ # s.list.any
48
+ # # =>
49
+ # # shape: (6,)
50
+ # # Series: '' [bool]
51
+ # # [
52
+ # # true
53
+ # # true
54
+ # # false
55
+ # # false
56
+ # # false
57
+ # # null
58
+ # # ]
59
+ def any
60
+ super
61
+ end
62
+
13
63
  # Get the length of the arrays as UInt32.
14
64
  #
15
65
  # @return [Series]
@@ -28,6 +78,58 @@ module Polars
28
78
  super
29
79
  end
30
80
 
81
+ # Drop all null values in the list.
82
+ #
83
+ # The original order of the remaining elements is preserved.
84
+ #
85
+ # @return [Series]
86
+ #
87
+ # @example
88
+ # s = Polars::Series.new("values", [[nil, 1, nil, 2], [nil], [3, 4]])
89
+ # s.list.drop_nulls
90
+ # # =>
91
+ # # shape: (3,)
92
+ # # Series: 'values' [list[i64]]
93
+ # # [
94
+ # # [1, 2]
95
+ # # []
96
+ # # [3, 4]
97
+ # # ]
98
+ def drop_nulls
99
+ super
100
+ end
101
+
102
+ # Sample from this list.
103
+ #
104
+ # @param n [Integer]
105
+ # Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
106
+ # `fraction` is nil.
107
+ # @param fraction [Float]
108
+ # Fraction of items to return. Cannot be used with `n`.
109
+ # @param with_replacement [Boolean]
110
+ # Allow values to be sampled more than once.
111
+ # @param shuffle [Boolean]
112
+ # Shuffle the order of sampled data points.
113
+ # @param seed [Integer]
114
+ # Seed for the random number generator. If set to nil (default), a
115
+ # random seed is generated for each sample operation.
116
+ #
117
+ # @return [Series]
118
+ #
119
+ # @example
120
+ # s = Polars::Series.new("values", [[1, 2, 3], [4, 5]])
121
+ # s.list.sample(n: Polars::Series.new("n", [2, 1]), seed: 1)
122
+ # # =>
123
+ # # shape: (2,)
124
+ # # Series: 'values' [list[i64]]
125
+ # # [
126
+ # # [2, 1]
127
+ # # [5]
128
+ # # ]
129
+ def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
130
+ super
131
+ end
132
+
31
133
  # Sum all the arrays in the list.
32
134
  #
33
135
  # @return [Series]
@@ -23,9 +23,114 @@ module Polars
23
23
  !(self == other)
24
24
  end
25
25
 
26
+ # Indicate if this expression is the same as another expression.
27
+ #
28
+ # @return [Boolean]
29
+ #
30
+ # @example
31
+ # foo_bar = Polars.col("foo").alias("bar")
32
+ # foo = Polars.col("foo")
33
+ # foo_bar.meta.eq(foo)
34
+ # # => false
35
+ # foo_bar2 = Polars.col("foo").alias("bar")
36
+ # foo_bar.meta.eq(foo_bar2)
37
+ # # => true
38
+ def eq(other)
39
+ _rbexpr.meta_eq(other._rbexpr)
40
+ end
41
+
42
+ # Indicate if this expression is NOT the same as another expression.
43
+ #
44
+ # @return [Boolean]
45
+ #
46
+ # @example
47
+ # foo_bar = Polars.col("foo").alias("bar")
48
+ # foo = Polars.col("foo")
49
+ # foo_bar.meta.ne(foo)
50
+ # # => true
51
+ # foo_bar2 = Polars.col("foo").alias("bar")
52
+ # foo_bar.meta.ne(foo_bar2)
53
+ # # => false
54
+ def ne(other)
55
+ !eq(other)
56
+ end
57
+
58
+ # Indicate if this expression expands into multiple expressions.
59
+ #
60
+ # @return [Boolean]
61
+ #
62
+ # @example
63
+ # e = Polars.col(["a", "b"]).alias("bar")
64
+ # e.meta.has_multiple_outputs
65
+ # # => true
66
+ def has_multiple_outputs
67
+ _rbexpr.meta_has_multiple_outputs
68
+ end
69
+
70
+ # Indicate if this expression is a basic (non-regex) unaliased column.
71
+ #
72
+ # @return [Boolean]
73
+ #
74
+ # @example
75
+ # e = Polars.col("foo")
76
+ # e.meta.is_column
77
+ # # => true
78
+ # e = Polars.col("foo") * Polars.col("bar")
79
+ # e.meta.is_column
80
+ # # => false
81
+ # e = Polars.col("^col.*\d+$")
82
+ # e.meta.is_column
83
+ # # => false
84
+ def is_column
85
+ _rbexpr.meta_is_column
86
+ end
87
+
88
+ # Indicate if this expression expands to columns that match a regex pattern.
89
+ #
90
+ # @return [Boolean]
91
+ #
92
+ # @example
93
+ # e = Polars.col("^.*$").alias("bar")
94
+ # e.meta.is_regex_projection
95
+ # # => true
96
+ def is_regex_projection
97
+ _rbexpr.meta_is_regex_projection
98
+ end
99
+
100
+ # Get the column name that this expression would produce.
101
+ #
102
+ # @return [String]
103
+ #
104
+ # @example
105
+ # e = Polars.col("foo") * Polars.col("bar")
106
+ # e.meta.output_name
107
+ # # => "foo"
108
+ # e_filter = Polars.col("foo").filter(Polars.col("bar") == 13)
109
+ # e_filter.meta.output_name
110
+ # # => "foo"
111
+ # e_sum_over = Polars.sum("foo").over("groups")
112
+ # e_sum_over.meta.output_name
113
+ # # => "foo"
114
+ # e_sum_slice = Polars.sum("foo").slice(Polars.len - 10, Polars.col("bar"))
115
+ # e_sum_slice.meta.output_name
116
+ # # => "foo"
117
+ # Polars.len.meta.output_name
118
+ # # => "len"
119
+ def output_name
120
+ _rbexpr.meta_output_name
121
+ end
122
+
26
123
  # Pop the latest expression and return the input(s) of the popped expression.
27
124
  #
28
125
  # @return [Array]
126
+ #
127
+ # @example
128
+ # e = Polars.col("foo").alias("bar")
129
+ # first = e.meta.pop[0]
130
+ # _ = first.meta == Polars.col("foo")
131
+ # # => true
132
+ # _ = first.meta == Polars.col("bar")
133
+ # # => false
29
134
  def pop
30
135
  _rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
31
136
  end
@@ -33,22 +138,85 @@ module Polars
33
138
  # Get a list with the root column name.
34
139
  #
35
140
  # @return [Array]
141
+ #
142
+ # @example
143
+ # e = Polars.col("foo") * Polars.col("bar")
144
+ # e.meta.root_names
145
+ # # => ["foo", "bar"]
146
+ # e_filter = Polars.col("foo").filter(Polars.col("bar") == 13)
147
+ # e_filter.meta.root_names
148
+ # # => ["foo", "bar"]
149
+ # e_sum_over = Polars.sum("foo").over("groups")
150
+ # e_sum_over.meta.root_names
151
+ # # => ["foo", "groups"]
152
+ # e_sum_slice = Polars.sum("foo").slice(Polars.len - 10, Polars.col("bar"))
153
+ # e_sum_slice.meta.root_names
154
+ # # => ["foo", "bar"]
36
155
  def root_names
37
156
  _rbexpr.meta_roots
38
157
  end
39
158
 
40
- # Get the column name that this expression would produce.
41
- #
42
- # @return [String]
43
- def output_name
44
- _rbexpr.meta_output_name
45
- end
46
-
47
159
  # Undo any renaming operation like `alias` or `keep_name`.
48
160
  #
49
161
  # @return [Expr]
162
+ #
163
+ # @example
164
+ # e = Polars.col("foo").alias("bar")
165
+ # _ = e.meta.undo_aliases.meta == Polars.col("foo")
166
+ # # => true
167
+ # e = Polars.col("foo").sum.over("bar")
168
+ # _ = e.name.keep.meta.undo_aliases.meta == e
169
+ # # => true
50
170
  def undo_aliases
51
171
  Utils.wrap_expr(_rbexpr.meta_undo_aliases)
52
172
  end
173
+
174
+ # Turn this expression in a selector.
175
+ #
176
+ # @return [Expr]
177
+ def _as_selector
178
+ Utils.wrap_expr(_rbexpr._meta_as_selector)
179
+ end
180
+
181
+ # Add selectors.
182
+ #
183
+ # @return [Expr]
184
+ def _selector_add(other)
185
+ Utils.wrap_expr(_rbexpr._meta_selector_add(other._rbexpr))
186
+ end
187
+
188
+ # Subtract selectors.
189
+ #
190
+ # @return [Expr]
191
+ def _selector_sub(other)
192
+ Utils.wrap_expr(_rbexpr._meta_selector_sub(other._rbexpr))
193
+ end
194
+
195
+ # & selectors.
196
+ #
197
+ # @return [Expr]
198
+ def _selector_and(other)
199
+ Utils.wrap_expr(_rbexpr._meta_selector_and(other._rbexpr))
200
+ end
201
+
202
+ # Format the expression as a tree.
203
+ #
204
+ # @param return_as_string [Boolean]
205
+ # If true, return as string rather than printing to stdout.
206
+ #
207
+ # @return [String]
208
+ #
209
+ # @example
210
+ # e = (Polars.col("foo") * Polars.col("bar")).sum.over(Polars.col("ham")) / 2
211
+ # e.meta.tree_format(return_as_string: true)
212
+ def tree_format(return_as_string: false)
213
+ s = _rbexpr.meta_tree_format
214
+ if return_as_string
215
+ s
216
+ else
217
+ puts s
218
+ nil
219
+ end
220
+ end
53
221
  end
54
222
  end