polars-df 0.10.0-x86_64-linux → 0.12.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/LICENSE-THIRD-PARTY.txt +1125 -865
  5. data/README.md +6 -6
  6. data/lib/polars/3.1/polars.so +0 -0
  7. data/lib/polars/3.2/polars.so +0 -0
  8. data/lib/polars/3.3/polars.so +0 -0
  9. data/lib/polars/array_expr.rb +4 -4
  10. data/lib/polars/batched_csv_reader.rb +11 -5
  11. data/lib/polars/cat_expr.rb +0 -36
  12. data/lib/polars/cat_name_space.rb +0 -37
  13. data/lib/polars/convert.rb +6 -1
  14. data/lib/polars/data_frame.rb +176 -403
  15. data/lib/polars/data_types.rb +1 -1
  16. data/lib/polars/date_time_expr.rb +525 -572
  17. data/lib/polars/date_time_name_space.rb +263 -460
  18. data/lib/polars/dynamic_group_by.rb +5 -5
  19. data/lib/polars/exceptions.rb +7 -0
  20. data/lib/polars/expr.rb +1394 -243
  21. data/lib/polars/expr_dispatch.rb +1 -1
  22. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  23. data/lib/polars/functions/as_datatype.rb +63 -40
  24. data/lib/polars/functions/lazy.rb +63 -14
  25. data/lib/polars/functions/lit.rb +1 -1
  26. data/lib/polars/functions/range/date_range.rb +90 -57
  27. data/lib/polars/functions/range/datetime_range.rb +149 -0
  28. data/lib/polars/functions/range/int_range.rb +2 -2
  29. data/lib/polars/functions/range/time_range.rb +141 -0
  30. data/lib/polars/functions/repeat.rb +1 -1
  31. data/lib/polars/functions/whenthen.rb +1 -1
  32. data/lib/polars/group_by.rb +88 -23
  33. data/lib/polars/io/avro.rb +24 -0
  34. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  35. data/lib/polars/io/database.rb +73 -0
  36. data/lib/polars/io/ipc.rb +247 -0
  37. data/lib/polars/io/json.rb +29 -0
  38. data/lib/polars/io/ndjson.rb +80 -0
  39. data/lib/polars/io/parquet.rb +227 -0
  40. data/lib/polars/lazy_frame.rb +143 -272
  41. data/lib/polars/lazy_group_by.rb +100 -3
  42. data/lib/polars/list_expr.rb +11 -11
  43. data/lib/polars/list_name_space.rb +5 -1
  44. data/lib/polars/rolling_group_by.rb +7 -9
  45. data/lib/polars/series.rb +103 -187
  46. data/lib/polars/string_expr.rb +78 -102
  47. data/lib/polars/string_name_space.rb +5 -4
  48. data/lib/polars/testing.rb +2 -2
  49. data/lib/polars/utils/constants.rb +9 -0
  50. data/lib/polars/utils/convert.rb +97 -0
  51. data/lib/polars/utils/parse.rb +89 -0
  52. data/lib/polars/utils/various.rb +76 -0
  53. data/lib/polars/utils/wrap.rb +19 -0
  54. data/lib/polars/utils.rb +8 -300
  55. data/lib/polars/version.rb +1 -1
  56. data/lib/polars/whenthen.rb +6 -6
  57. data/lib/polars.rb +20 -1
  58. metadata +17 -4
@@ -6,11 +6,108 @@ module Polars
6
6
  @lgb = lgb
7
7
  end
8
8
 
9
- # Describe the aggregation that need to be done on a group.
9
+ # Compute aggregations for each group of a group by operation.
10
+ #
11
+ # @param aggs [Array]
12
+ # Aggregations to compute for each group of the group by operation,
13
+ # specified as positional arguments.
14
+ # Accepts expression input. Strings are parsed as column names.
15
+ # @param named_aggs [Hash]
16
+ # Additional aggregations, specified as keyword arguments.
17
+ # The resulting columns will be renamed to the keyword used.
10
18
  #
11
19
  # @return [LazyFrame]
12
- def agg(aggs)
13
- rbexprs = Utils.selection_to_rbexpr_list(aggs)
20
+ #
21
+ # @example Compute the aggregation of the columns for each group.
22
+ # ldf = Polars::DataFrame.new(
23
+ # {
24
+ # "a" => ["a", "b", "a", "b", "c"],
25
+ # "b" => [1, 2, 1, 3, 3],
26
+ # "c" => [5, 4, 3, 2, 1]
27
+ # }
28
+ # ).lazy
29
+ # ldf.group_by("a").agg(
30
+ # [Polars.col("b"), Polars.col("c")]
31
+ # ).collect
32
+ # # =>
33
+ # # shape: (3, 3)
34
+ # # ┌─────┬───────────┬───────────┐
35
+ # # │ a ┆ b ┆ c │
36
+ # # │ --- ┆ --- ┆ --- │
37
+ # # │ str ┆ list[i64] ┆ list[i64] │
38
+ # # ╞═════╪═══════════╪═══════════╡
39
+ # # │ a ┆ [1, 1] ┆ [5, 3] │
40
+ # # │ b ┆ [2, 3] ┆ [4, 2] │
41
+ # # │ c ┆ [3] ┆ [1] │
42
+ # # └─────┴───────────┴───────────┘
43
+ #
44
+ # @example Compute the sum of a column for each group.
45
+ # ldf.group_by("a").agg(
46
+ # Polars.col("b").sum
47
+ # ).collect
48
+ # # =>
49
+ # # shape: (3, 2)
50
+ # # ┌─────┬─────┐
51
+ # # │ a ┆ b │
52
+ # # │ --- ┆ --- │
53
+ # # │ str ┆ i64 │
54
+ # # ╞═════╪═════╡
55
+ # # │ a ┆ 2 │
56
+ # # │ b ┆ 5 │
57
+ # # │ c ┆ 3 │
58
+ # # └─────┴─────┘
59
+ #
60
+ # @example Compute multiple aggregates at once by passing a list of expressions.
61
+ # ldf.group_by("a").agg(
62
+ # [Polars.sum("b"), Polars.mean("c")]
63
+ # ).collect
64
+ # # =>
65
+ # # shape: (3, 3)
66
+ # # ┌─────┬─────┬─────┐
67
+ # # │ a ┆ b ┆ c │
68
+ # # │ --- ┆ --- ┆ --- │
69
+ # # │ str ┆ i64 ┆ f64 │
70
+ # # ╞═════╪═════╪═════╡
71
+ # # │ c ┆ 3 ┆ 1.0 │
72
+ # # │ a ┆ 2 ┆ 4.0 │
73
+ # # │ b ┆ 5 ┆ 3.0 │
74
+ # # └─────┴─────┴─────┘
75
+ #
76
+ # @example Or use positional arguments to compute multiple aggregations in the same way.
77
+ # ldf.group_by("a").agg(
78
+ # Polars.sum("b").name.suffix("_sum"),
79
+ # (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
80
+ # ).collect
81
+ # # =>
82
+ # # shape: (3, 3)
83
+ # # ┌─────┬───────┬────────────────┐
84
+ # # │ a ┆ b_sum ┆ c_mean_squared │
85
+ # # │ --- ┆ --- ┆ --- │
86
+ # # │ str ┆ i64 ┆ f64 │
87
+ # # ╞═════╪═══════╪════════════════╡
88
+ # # │ a ┆ 2 ┆ 17.0 │
89
+ # # │ c ┆ 3 ┆ 1.0 │
90
+ # # │ b ┆ 5 ┆ 10.0 │
91
+ # # └─────┴───────┴────────────────┘
92
+ #
93
+ # @example Use keyword arguments to easily name your expression inputs.
94
+ # ldf.group_by("a").agg(
95
+ # b_sum: Polars.sum("b"),
96
+ # c_mean_squared: (Polars.col("c") ** 2).mean
97
+ # ).collect
98
+ # # =>
99
+ # # shape: (3, 3)
100
+ # # ┌─────┬───────┬────────────────┐
101
+ # # │ a ┆ b_sum ┆ c_mean_squared │
102
+ # # │ --- ┆ --- ┆ --- │
103
+ # # │ str ┆ i64 ┆ f64 │
104
+ # # ╞═════╪═══════╪════════════════╡
105
+ # # │ a ┆ 2 ┆ 17.0 │
106
+ # # │ c ┆ 3 ┆ 1.0 │
107
+ # # │ b ┆ 5 ┆ 10.0 │
108
+ # # └─────┴───────┴────────────────┘
109
+ def agg(*aggs, **named_aggs)
110
+ rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
14
111
  Utils.wrap_ldf(@lgb.agg(rbexprs))
15
112
  end
16
113
 
@@ -146,7 +146,7 @@ module Polars
146
146
  end
147
147
 
148
148
  if !fraction.nil?
149
- fraction = Utils.parse_as_expression(fraction)
149
+ fraction = Utils.parse_into_expression(fraction)
150
150
  return Utils.wrap_expr(
151
151
  _rbexpr.list_sample_fraction(
152
152
  fraction, with_replacement, shuffle, seed
@@ -155,7 +155,7 @@ module Polars
155
155
  end
156
156
 
157
157
  n = 1 if n.nil?
158
- n = Utils.parse_as_expression(n)
158
+ n = Utils.parse_into_expression(n)
159
159
  Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
160
160
  end
161
161
 
@@ -387,7 +387,7 @@ module Polars
387
387
  # # │ 1 │
388
388
  # # └──────┘
389
389
  def get(index, null_on_oob: true)
390
- index = Utils.parse_as_expression(index)
390
+ index = Utils.parse_into_expression(index)
391
391
  Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
392
392
  end
393
393
 
@@ -431,7 +431,7 @@ module Polars
431
431
  if index.is_a?(::Array)
432
432
  index = Series.new(index)
433
433
  end
434
- index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
434
+ index = Utils.parse_into_expression(index, str_as_lit: false)
435
435
  Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
436
436
  end
437
437
  alias_method :take, :gather
@@ -502,7 +502,7 @@ module Polars
502
502
  # # │ true │
503
503
  # # └───────┘
504
504
  def contains(item)
505
- Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
505
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
506
506
  end
507
507
 
508
508
  # Join all string items in a sublist and place a separator between them.
@@ -530,7 +530,7 @@ module Polars
530
530
  # # │ x y │
531
531
  # # └───────┘
532
532
  def join(separator, ignore_nulls: true)
533
- separator = Utils.parse_as_expression(separator, str_as_lit: true)
533
+ separator = Utils.parse_into_expression(separator, str_as_lit: true)
534
534
  Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
535
535
  end
536
536
 
@@ -625,7 +625,7 @@ module Polars
625
625
  # # [null, 10, 2]
626
626
  # # ]
627
627
  def shift(n = 1)
628
- n = Utils.parse_as_expression(n)
628
+ n = Utils.parse_into_expression(n)
629
629
  Utils.wrap_expr(_rbexpr.list_shift(n))
630
630
  end
631
631
 
@@ -650,8 +650,8 @@ module Polars
650
650
  # # [2, 1]
651
651
  # # ]
652
652
  def slice(offset, length = nil)
653
- offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
654
- length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
653
+ offset = Utils.parse_into_expression(offset, str_as_lit: false)
654
+ length = Utils.parse_into_expression(length, str_as_lit: false)
655
655
  Utils.wrap_expr(_rbexpr.list_slice(offset, length))
656
656
  end
657
657
 
@@ -694,7 +694,7 @@ module Polars
694
694
  # # [2, 1]
695
695
  # # ]
696
696
  def tail(n = 5)
697
- n = Utils.parse_as_expression(n)
697
+ n = Utils.parse_into_expression(n)
698
698
  Utils.wrap_expr(_rbexpr.list_tail(n))
699
699
  end
700
700
 
@@ -722,7 +722,7 @@ module Polars
722
722
  # # │ 0 │
723
723
  # # └────────────────┘
724
724
  def count_matches(element)
725
- Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
725
+ Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
726
726
  end
727
727
  alias_method :count_match, :count_matches
728
728
 
@@ -197,9 +197,13 @@ module Polars
197
197
  #
198
198
  # @param index [Integer]
199
199
  # Index to return per sublist
200
+ # @param null_on_oob [Boolean]
201
+ # Behavior if an index is out of bounds:
202
+ # true -> set as null
203
+ # false -> raise an error
200
204
  #
201
205
  # @return [Series]
202
- def get(index)
206
+ def get(index, null_on_oob: false)
203
207
  super
204
208
  end
205
209
 
@@ -10,27 +10,25 @@ module Polars
10
10
  period,
11
11
  offset,
12
12
  closed,
13
- by,
14
- check_sorted
13
+ group_by
15
14
  )
16
- period = Utils._timedelta_to_pl_duration(period)
17
- offset = Utils._timedelta_to_pl_duration(offset)
15
+ period = Utils.parse_as_duration_string(period)
16
+ offset = Utils.parse_as_duration_string(offset)
18
17
 
19
18
  @df = df
20
19
  @time_column = index_column
21
20
  @period = period
22
21
  @offset = offset
23
22
  @closed = closed
24
- @by = by
25
- @check_sorted = check_sorted
23
+ @group_by = group_by
26
24
  end
27
25
 
28
- def agg(aggs)
26
+ def agg(*aggs, **named_aggs)
29
27
  @df.lazy
30
28
  .group_by_rolling(
31
- index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
32
30
  )
33
- .agg(aggs)
31
+ .agg(*aggs, **named_aggs)
34
32
  .collect(no_optimization: true, string_cache: false)
35
33
  end
36
34
  end