polars-df 0.10.0-x86_64-linux → 0.12.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/LICENSE-THIRD-PARTY.txt +1125 -865
  5. data/README.md +6 -6
  6. data/lib/polars/3.1/polars.so +0 -0
  7. data/lib/polars/3.2/polars.so +0 -0
  8. data/lib/polars/3.3/polars.so +0 -0
  9. data/lib/polars/array_expr.rb +4 -4
  10. data/lib/polars/batched_csv_reader.rb +11 -5
  11. data/lib/polars/cat_expr.rb +0 -36
  12. data/lib/polars/cat_name_space.rb +0 -37
  13. data/lib/polars/convert.rb +6 -1
  14. data/lib/polars/data_frame.rb +176 -403
  15. data/lib/polars/data_types.rb +1 -1
  16. data/lib/polars/date_time_expr.rb +525 -572
  17. data/lib/polars/date_time_name_space.rb +263 -460
  18. data/lib/polars/dynamic_group_by.rb +5 -5
  19. data/lib/polars/exceptions.rb +7 -0
  20. data/lib/polars/expr.rb +1394 -243
  21. data/lib/polars/expr_dispatch.rb +1 -1
  22. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  23. data/lib/polars/functions/as_datatype.rb +63 -40
  24. data/lib/polars/functions/lazy.rb +63 -14
  25. data/lib/polars/functions/lit.rb +1 -1
  26. data/lib/polars/functions/range/date_range.rb +90 -57
  27. data/lib/polars/functions/range/datetime_range.rb +149 -0
  28. data/lib/polars/functions/range/int_range.rb +2 -2
  29. data/lib/polars/functions/range/time_range.rb +141 -0
  30. data/lib/polars/functions/repeat.rb +1 -1
  31. data/lib/polars/functions/whenthen.rb +1 -1
  32. data/lib/polars/group_by.rb +88 -23
  33. data/lib/polars/io/avro.rb +24 -0
  34. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  35. data/lib/polars/io/database.rb +73 -0
  36. data/lib/polars/io/ipc.rb +247 -0
  37. data/lib/polars/io/json.rb +29 -0
  38. data/lib/polars/io/ndjson.rb +80 -0
  39. data/lib/polars/io/parquet.rb +227 -0
  40. data/lib/polars/lazy_frame.rb +143 -272
  41. data/lib/polars/lazy_group_by.rb +100 -3
  42. data/lib/polars/list_expr.rb +11 -11
  43. data/lib/polars/list_name_space.rb +5 -1
  44. data/lib/polars/rolling_group_by.rb +7 -9
  45. data/lib/polars/series.rb +103 -187
  46. data/lib/polars/string_expr.rb +78 -102
  47. data/lib/polars/string_name_space.rb +5 -4
  48. data/lib/polars/testing.rb +2 -2
  49. data/lib/polars/utils/constants.rb +9 -0
  50. data/lib/polars/utils/convert.rb +97 -0
  51. data/lib/polars/utils/parse.rb +89 -0
  52. data/lib/polars/utils/various.rb +76 -0
  53. data/lib/polars/utils/wrap.rb +19 -0
  54. data/lib/polars/utils.rb +8 -300
  55. data/lib/polars/version.rb +1 -1
  56. data/lib/polars/whenthen.rb +6 -6
  57. data/lib/polars.rb +20 -1
  58. metadata +17 -4
@@ -6,11 +6,108 @@ module Polars
6
6
  @lgb = lgb
7
7
  end
8
8
 
9
- # Describe the aggregation that need to be done on a group.
9
+ # Compute aggregations for each group of a group by operation.
10
+ #
11
+ # @param aggs [Array]
12
+ # Aggregations to compute for each group of the group by operation,
13
+ # specified as positional arguments.
14
+ # Accepts expression input. Strings are parsed as column names.
15
+ # @param named_aggs [Hash]
16
+ # Additional aggregations, specified as keyword arguments.
17
+ # The resulting columns will be renamed to the keyword used.
10
18
  #
11
19
  # @return [LazyFrame]
12
- def agg(aggs)
13
- rbexprs = Utils.selection_to_rbexpr_list(aggs)
20
+ #
21
+ # @example Compute the aggregation of the columns for each group.
22
+ # ldf = Polars::DataFrame.new(
23
+ # {
24
+ # "a" => ["a", "b", "a", "b", "c"],
25
+ # "b" => [1, 2, 1, 3, 3],
26
+ # "c" => [5, 4, 3, 2, 1]
27
+ # }
28
+ # ).lazy
29
+ # ldf.group_by("a").agg(
30
+ # [Polars.col("b"), Polars.col("c")]
31
+ # ).collect
32
+ # # =>
33
+ # # shape: (3, 3)
34
+ # # ┌─────┬───────────┬───────────┐
35
+ # # │ a ┆ b ┆ c │
36
+ # # │ --- ┆ --- ┆ --- │
37
+ # # │ str ┆ list[i64] ┆ list[i64] │
38
+ # # ╞═════╪═══════════╪═══════════╡
39
+ # # │ a ┆ [1, 1] ┆ [5, 3] │
40
+ # # │ b ┆ [2, 3] ┆ [4, 2] │
41
+ # # │ c ┆ [3] ┆ [1] │
42
+ # # └─────┴───────────┴───────────┘
43
+ #
44
+ # @example Compute the sum of a column for each group.
45
+ # ldf.group_by("a").agg(
46
+ # Polars.col("b").sum
47
+ # ).collect
48
+ # # =>
49
+ # # shape: (3, 2)
50
+ # # ┌─────┬─────┐
51
+ # # │ a ┆ b │
52
+ # # │ --- ┆ --- │
53
+ # # │ str ┆ i64 │
54
+ # # ╞═════╪═════╡
55
+ # # │ a ┆ 2 │
56
+ # # │ b ┆ 5 │
57
+ # # │ c ┆ 3 │
58
+ # # └─────┴─────┘
59
+ #
60
+ # @example Compute multiple aggregates at once by passing a list of expressions.
61
+ # ldf.group_by("a").agg(
62
+ # [Polars.sum("b"), Polars.mean("c")]
63
+ # ).collect
64
+ # # =>
65
+ # # shape: (3, 3)
66
+ # # ┌─────┬─────┬─────┐
67
+ # # │ a ┆ b ┆ c │
68
+ # # │ --- ┆ --- ┆ --- │
69
+ # # │ str ┆ i64 ┆ f64 │
70
+ # # ╞═════╪═════╪═════╡
71
+ # # │ c ┆ 3 ┆ 1.0 │
72
+ # # │ a ┆ 2 ┆ 4.0 │
73
+ # # │ b ┆ 5 ┆ 3.0 │
74
+ # # └─────┴─────┴─────┘
75
+ #
76
+ # @example Or use positional arguments to compute multiple aggregations in the same way.
77
+ # ldf.group_by("a").agg(
78
+ # Polars.sum("b").name.suffix("_sum"),
79
+ # (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
80
+ # ).collect
81
+ # # =>
82
+ # # shape: (3, 3)
83
+ # # ┌─────┬───────┬────────────────┐
84
+ # # │ a ┆ b_sum ┆ c_mean_squared │
85
+ # # │ --- ┆ --- ┆ --- │
86
+ # # │ str ┆ i64 ┆ f64 │
87
+ # # ╞═════╪═══════╪════════════════╡
88
+ # # │ a ┆ 2 ┆ 17.0 │
89
+ # # │ c ┆ 3 ┆ 1.0 │
90
+ # # │ b ┆ 5 ┆ 10.0 │
91
+ # # └─────┴───────┴────────────────┘
92
+ #
93
+ # @example Use keyword arguments to easily name your expression inputs.
94
+ # ldf.group_by("a").agg(
95
+ # b_sum: Polars.sum("b"),
96
+ # c_mean_squared: (Polars.col("c") ** 2).mean
97
+ # ).collect
98
+ # # =>
99
+ # # shape: (3, 3)
100
+ # # ┌─────┬───────┬────────────────┐
101
+ # # │ a ┆ b_sum ┆ c_mean_squared │
102
+ # # │ --- ┆ --- ┆ --- │
103
+ # # │ str ┆ i64 ┆ f64 │
104
+ # # ╞═════╪═══════╪════════════════╡
105
+ # # │ a ┆ 2 ┆ 17.0 │
106
+ # # │ c ┆ 3 ┆ 1.0 │
107
+ # # │ b ┆ 5 ┆ 10.0 │
108
+ # # └─────┴───────┴────────────────┘
109
+ def agg(*aggs, **named_aggs)
110
+ rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
14
111
  Utils.wrap_ldf(@lgb.agg(rbexprs))
15
112
  end
16
113
 
@@ -146,7 +146,7 @@ module Polars
146
146
  end
147
147
 
148
148
  if !fraction.nil?
149
- fraction = Utils.parse_as_expression(fraction)
149
+ fraction = Utils.parse_into_expression(fraction)
150
150
  return Utils.wrap_expr(
151
151
  _rbexpr.list_sample_fraction(
152
152
  fraction, with_replacement, shuffle, seed
@@ -155,7 +155,7 @@ module Polars
155
155
  end
156
156
 
157
157
  n = 1 if n.nil?
158
- n = Utils.parse_as_expression(n)
158
+ n = Utils.parse_into_expression(n)
159
159
  Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
160
160
  end
161
161
 
@@ -387,7 +387,7 @@ module Polars
387
387
  # # │ 1 │
388
388
  # # └──────┘
389
389
  def get(index, null_on_oob: true)
390
- index = Utils.parse_as_expression(index)
390
+ index = Utils.parse_into_expression(index)
391
391
  Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
392
392
  end
393
393
 
@@ -431,7 +431,7 @@ module Polars
431
431
  if index.is_a?(::Array)
432
432
  index = Series.new(index)
433
433
  end
434
- index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
434
+ index = Utils.parse_into_expression(index, str_as_lit: false)
435
435
  Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
436
436
  end
437
437
  alias_method :take, :gather
@@ -502,7 +502,7 @@ module Polars
502
502
  # # │ true │
503
503
  # # └───────┘
504
504
  def contains(item)
505
- Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
505
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
506
506
  end
507
507
 
508
508
  # Join all string items in a sublist and place a separator between them.
@@ -530,7 +530,7 @@ module Polars
530
530
  # # │ x y │
531
531
  # # └───────┘
532
532
  def join(separator, ignore_nulls: true)
533
- separator = Utils.parse_as_expression(separator, str_as_lit: true)
533
+ separator = Utils.parse_into_expression(separator, str_as_lit: true)
534
534
  Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
535
535
  end
536
536
 
@@ -625,7 +625,7 @@ module Polars
625
625
  # # [null, 10, 2]
626
626
  # # ]
627
627
  def shift(n = 1)
628
- n = Utils.parse_as_expression(n)
628
+ n = Utils.parse_into_expression(n)
629
629
  Utils.wrap_expr(_rbexpr.list_shift(n))
630
630
  end
631
631
 
@@ -650,8 +650,8 @@ module Polars
650
650
  # # [2, 1]
651
651
  # # ]
652
652
  def slice(offset, length = nil)
653
- offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
654
- length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
653
+ offset = Utils.parse_into_expression(offset, str_as_lit: false)
654
+ length = Utils.parse_into_expression(length, str_as_lit: false)
655
655
  Utils.wrap_expr(_rbexpr.list_slice(offset, length))
656
656
  end
657
657
 
@@ -694,7 +694,7 @@ module Polars
694
694
  # # [2, 1]
695
695
  # # ]
696
696
  def tail(n = 5)
697
- n = Utils.parse_as_expression(n)
697
+ n = Utils.parse_into_expression(n)
698
698
  Utils.wrap_expr(_rbexpr.list_tail(n))
699
699
  end
700
700
 
@@ -722,7 +722,7 @@ module Polars
722
722
  # # │ 0 │
723
723
  # # └────────────────┘
724
724
  def count_matches(element)
725
- Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
725
+ Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
726
726
  end
727
727
  alias_method :count_match, :count_matches
728
728
 
@@ -197,9 +197,13 @@ module Polars
197
197
  #
198
198
  # @param index [Integer]
199
199
  # Index to return per sublist
200
+ # @param null_on_oob [Boolean]
201
+ # Behavior if an index is out of bounds:
202
+ # true -> set as null
203
+ # false -> raise an error
200
204
  #
201
205
  # @return [Series]
202
- def get(index)
206
+ def get(index, null_on_oob: false)
203
207
  super
204
208
  end
205
209
 
@@ -10,27 +10,25 @@ module Polars
10
10
  period,
11
11
  offset,
12
12
  closed,
13
- by,
14
- check_sorted
13
+ group_by
15
14
  )
16
- period = Utils._timedelta_to_pl_duration(period)
17
- offset = Utils._timedelta_to_pl_duration(offset)
15
+ period = Utils.parse_as_duration_string(period)
16
+ offset = Utils.parse_as_duration_string(offset)
18
17
 
19
18
  @df = df
20
19
  @time_column = index_column
21
20
  @period = period
22
21
  @offset = offset
23
22
  @closed = closed
24
- @by = by
25
- @check_sorted = check_sorted
23
+ @group_by = group_by
26
24
  end
27
25
 
28
- def agg(aggs)
26
+ def agg(*aggs, **named_aggs)
29
27
  @df.lazy
30
28
  .group_by_rolling(
31
- index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
32
30
  )
33
- .agg(aggs)
31
+ .agg(*aggs, **named_aggs)
34
32
  .collect(no_optimization: true, string_cache: false)
35
33
  end
36
34
  end