polars-df 0.13.0-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,181 @@
1
+ module Polars
2
+ # Created by `df.lazy.group_by("foo")`.
3
+ class LazyGroupBy
4
+ # @private
5
+ def initialize(lgb)
6
+ @lgb = lgb
7
+ end
8
+
9
+ # Compute aggregations for each group of a group by operation.
10
+ #
11
+ # @param aggs [Array]
12
+ # Aggregations to compute for each group of the group by operation,
13
+ # specified as positional arguments.
14
+ # Accepts expression input. Strings are parsed as column names.
15
+ # @param named_aggs [Hash]
16
+ # Additional aggregations, specified as keyword arguments.
17
+ # The resulting columns will be renamed to the keyword used.
18
+ #
19
+ # @return [LazyFrame]
20
+ #
21
+ # @example Compute the aggregation of the columns for each group.
22
+ # ldf = Polars::DataFrame.new(
23
+ # {
24
+ # "a" => ["a", "b", "a", "b", "c"],
25
+ # "b" => [1, 2, 1, 3, 3],
26
+ # "c" => [5, 4, 3, 2, 1]
27
+ # }
28
+ # ).lazy
29
+ # ldf.group_by("a").agg(
30
+ # [Polars.col("b"), Polars.col("c")]
31
+ # ).collect
32
+ # # =>
33
+ # # shape: (3, 3)
34
+ # # ┌─────┬───────────┬───────────┐
35
+ # # │ a ┆ b ┆ c │
36
+ # # │ --- ┆ --- ┆ --- │
37
+ # # │ str ┆ list[i64] ┆ list[i64] │
38
+ # # ╞═════╪═══════════╪═══════════╡
39
+ # # │ a ┆ [1, 1] ┆ [5, 3] │
40
+ # # │ b ┆ [2, 3] ┆ [4, 2] │
41
+ # # │ c ┆ [3] ┆ [1] │
42
+ # # └─────┴───────────┴───────────┘
43
+ #
44
+ # @example Compute the sum of a column for each group.
45
+ # ldf.group_by("a").agg(
46
+ # Polars.col("b").sum
47
+ # ).collect
48
+ # # =>
49
+ # # shape: (3, 2)
50
+ # # ┌─────┬─────┐
51
+ # # │ a ┆ b │
52
+ # # │ --- ┆ --- │
53
+ # # │ str ┆ i64 │
54
+ # # ╞═════╪═════╡
55
+ # # │ a ┆ 2 │
56
+ # # │ b ┆ 5 │
57
+ # # │ c ┆ 3 │
58
+ # # └─────┴─────┘
59
+ #
60
+ # @example Compute multiple aggregates at once by passing a list of expressions.
61
+ # ldf.group_by("a").agg(
62
+ # [Polars.sum("b"), Polars.mean("c")]
63
+ # ).collect
64
+ # # =>
65
+ # # shape: (3, 3)
66
+ # # ┌─────┬─────┬─────┐
67
+ # # │ a ┆ b ┆ c │
68
+ # # │ --- ┆ --- ┆ --- │
69
+ # # │ str ┆ i64 ┆ f64 │
70
+ # # ╞═════╪═════╪═════╡
71
+ # # │ c ┆ 3 ┆ 1.0 │
72
+ # # │ a ┆ 2 ┆ 4.0 │
73
+ # # │ b ┆ 5 ┆ 3.0 │
74
+ # # └─────┴─────┴─────┘
75
+ #
76
+ # @example Or use positional arguments to compute multiple aggregations in the same way.
77
+ # ldf.group_by("a").agg(
78
+ # Polars.sum("b").name.suffix("_sum"),
79
+ # (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
80
+ # ).collect
81
+ # # =>
82
+ # # shape: (3, 3)
83
+ # # ┌─────┬───────┬────────────────┐
84
+ # # │ a ┆ b_sum ┆ c_mean_squared │
85
+ # # │ --- ┆ --- ┆ --- │
86
+ # # │ str ┆ i64 ┆ f64 │
87
+ # # ╞═════╪═══════╪════════════════╡
88
+ # # │ a ┆ 2 ┆ 17.0 │
89
+ # # │ c ┆ 3 ┆ 1.0 │
90
+ # # │ b ┆ 5 ┆ 10.0 │
91
+ # # └─────┴───────┴────────────────┘
92
+ #
93
+ # @example Use keyword arguments to easily name your expression inputs.
94
+ # ldf.group_by("a").agg(
95
+ # b_sum: Polars.sum("b"),
96
+ # c_mean_squared: (Polars.col("c") ** 2).mean
97
+ # ).collect
98
+ # # =>
99
+ # # shape: (3, 3)
100
+ # # ┌─────┬───────┬────────────────┐
101
+ # # │ a ┆ b_sum ┆ c_mean_squared │
102
+ # # │ --- ┆ --- ┆ --- │
103
+ # # │ str ┆ i64 ┆ f64 │
104
+ # # ╞═════╪═══════╪════════════════╡
105
+ # # │ a ┆ 2 ┆ 17.0 │
106
+ # # │ c ┆ 3 ┆ 1.0 │
107
+ # # │ b ┆ 5 ┆ 10.0 │
108
+ # # └─────┴───────┴────────────────┘
109
+ def agg(*aggs, **named_aggs)
110
+ rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
111
+ Utils.wrap_ldf(@lgb.agg(rbexprs))
112
+ end
113
+
114
+ # Get the first `n` rows of each group.
115
+ #
116
+ # @param n [Integer]
117
+ # Number of rows to return.
118
+ #
119
+ # @return [LazyFrame]
120
+ #
121
+ # @example
122
+ # df = Polars::DataFrame.new(
123
+ # {
124
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
125
+ # "nrs" => [1, 2, 3, 4, 5, 6]
126
+ # }
127
+ # )
128
+ # df.group_by("letters").head(2).sort("letters")
129
+ # # =>
130
+ # # shape: (5, 2)
131
+ # # ┌─────────┬─────┐
132
+ # # │ letters ┆ nrs │
133
+ # # │ --- ┆ --- │
134
+ # # │ str ┆ i64 │
135
+ # # ╞═════════╪═════╡
136
+ # # │ a ┆ 3 │
137
+ # # │ a ┆ 5 │
138
+ # # │ b ┆ 6 │
139
+ # # │ c ┆ 1 │
140
+ # # │ c ┆ 2 │
141
+ # # └─────────┴─────┘
142
+ def head(n = 5)
143
+ Utils.wrap_ldf(@lgb.head(n))
144
+ end
145
+
146
+ # Get the last `n` rows of each group.
147
+ #
148
+ # @param n [Integer]
149
+ # Number of rows to return.
150
+ #
151
+ # @return [LazyFrame]
152
+ #
153
+ # @example
154
+ # df = Polars::DataFrame.new(
155
+ # {
156
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
157
+ # "nrs" => [1, 2, 3, 4, 5, 6]
158
+ # }
159
+ # )
160
+ # df.group_by("letters").tail(2).sort("letters")
161
+ # # =>
162
+ # # shape: (5, 2)
163
+ # # ┌─────────┬─────┐
164
+ # # │ letters ┆ nrs │
165
+ # # │ --- ┆ --- │
166
+ # # │ str ┆ i64 │
167
+ # # ╞═════════╪═════╡
168
+ # # │ a ┆ 3 │
169
+ # # │ a ┆ 5 │
170
+ # # │ b ┆ 6 │
171
+ # # │ c ┆ 2 │
172
+ # # │ c ┆ 4 │
173
+ # # └─────────┴─────┘
174
+ def tail(n = 5)
175
+ Utils.wrap_ldf(@lgb.tail(n))
176
+ end
177
+
178
+ # def apply
179
+ # end
180
+ end
181
+ end