polars-df 0.13.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,181 @@
1
+ module Polars
2
+ # Created by `df.lazy.group_by("foo")`.
3
+ class LazyGroupBy
4
+ # @private
5
+ def initialize(lgb)
6
+ @lgb = lgb
7
+ end
8
+
9
+ # Compute aggregations for each group of a group by operation.
10
+ #
11
+ # @param aggs [Array]
12
+ # Aggregations to compute for each group of the group by operation,
13
+ # specified as positional arguments.
14
+ # Accepts expression input. Strings are parsed as column names.
15
+ # @param named_aggs [Hash]
16
+ # Additional aggregations, specified as keyword arguments.
17
+ # The resulting columns will be renamed to the keyword used.
18
+ #
19
+ # @return [LazyFrame]
20
+ #
21
+ # @example Compute the aggregation of the columns for each group.
22
+ # ldf = Polars::DataFrame.new(
23
+ # {
24
+ # "a" => ["a", "b", "a", "b", "c"],
25
+ # "b" => [1, 2, 1, 3, 3],
26
+ # "c" => [5, 4, 3, 2, 1]
27
+ # }
28
+ # ).lazy
29
+ # ldf.group_by("a").agg(
30
+ # [Polars.col("b"), Polars.col("c")]
31
+ # ).collect
32
+ # # =>
33
+ # # shape: (3, 3)
34
+ # # ┌─────┬───────────┬───────────┐
35
+ # # │ a ┆ b ┆ c │
36
+ # # │ --- ┆ --- ┆ --- │
37
+ # # │ str ┆ list[i64] ┆ list[i64] │
38
+ # # ╞═════╪═══════════╪═══════════╡
39
+ # # │ a ┆ [1, 1] ┆ [5, 3] │
40
+ # # │ b ┆ [2, 3] ┆ [4, 2] │
41
+ # # │ c ┆ [3] ┆ [1] │
42
+ # # └─────┴───────────┴───────────┘
43
+ #
44
+ # @example Compute the sum of a column for each group.
45
+ # ldf.group_by("a").agg(
46
+ # Polars.col("b").sum
47
+ # ).collect
48
+ # # =>
49
+ # # shape: (3, 2)
50
+ # # ┌─────┬─────┐
51
+ # # │ a ┆ b │
52
+ # # │ --- ┆ --- │
53
+ # # │ str ┆ i64 │
54
+ # # ╞═════╪═════╡
55
+ # # │ a ┆ 2 │
56
+ # # │ b ┆ 5 │
57
+ # # │ c ┆ 3 │
58
+ # # └─────┴─────┘
59
+ #
60
+ # @example Compute multiple aggregates at once by passing a list of expressions.
61
+ # ldf.group_by("a").agg(
62
+ # [Polars.sum("b"), Polars.mean("c")]
63
+ # ).collect
64
+ # # =>
65
+ # # shape: (3, 3)
66
+ # # ┌─────┬─────┬─────┐
67
+ # # │ a ┆ b ┆ c │
68
+ # # │ --- ┆ --- ┆ --- │
69
+ # # │ str ┆ i64 ┆ f64 │
70
+ # # ╞═════╪═════╪═════╡
71
+ # # │ c ┆ 3 ┆ 1.0 │
72
+ # # │ a ┆ 2 ┆ 4.0 │
73
+ # # │ b ┆ 5 ┆ 3.0 │
74
+ # # └─────┴─────┴─────┘
75
+ #
76
+ # @example Or use positional arguments to compute multiple aggregations in the same way.
77
+ # ldf.group_by("a").agg(
78
+ # Polars.sum("b").name.suffix("_sum"),
79
+ # (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
80
+ # ).collect
81
+ # # =>
82
+ # # shape: (3, 3)
83
+ # # ┌─────┬───────┬────────────────┐
84
+ # # │ a ┆ b_sum ┆ c_mean_squared │
85
+ # # │ --- ┆ --- ┆ --- │
86
+ # # │ str ┆ i64 ┆ f64 │
87
+ # # ╞═════╪═══════╪════════════════╡
88
+ # # │ a ┆ 2 ┆ 17.0 │
89
+ # # │ c ┆ 3 ┆ 1.0 │
90
+ # # │ b ┆ 5 ┆ 10.0 │
91
+ # # └─────┴───────┴────────────────┘
92
+ #
93
+ # @example Use keyword arguments to easily name your expression inputs.
94
+ # ldf.group_by("a").agg(
95
+ # b_sum: Polars.sum("b"),
96
+ # c_mean_squared: (Polars.col("c") ** 2).mean
97
+ # ).collect
98
+ # # =>
99
+ # # shape: (3, 3)
100
+ # # ┌─────┬───────┬────────────────┐
101
+ # # │ a ┆ b_sum ┆ c_mean_squared │
102
+ # # │ --- ┆ --- ┆ --- │
103
+ # # │ str ┆ i64 ┆ f64 │
104
+ # # ╞═════╪═══════╪════════════════╡
105
+ # # │ a ┆ 2 ┆ 17.0 │
106
+ # # │ c ┆ 3 ┆ 1.0 │
107
+ # # │ b ┆ 5 ┆ 10.0 │
108
+ # # └─────┴───────┴────────────────┘
109
+ def agg(*aggs, **named_aggs)
110
+ rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
111
+ Utils.wrap_ldf(@lgb.agg(rbexprs))
112
+ end
113
+
114
+ # Get the first `n` rows of each group.
115
+ #
116
+ # @param n [Integer]
117
+ # Number of rows to return.
118
+ #
119
+ # @return [LazyFrame]
120
+ #
121
+ # @example
122
+ # df = Polars::DataFrame.new(
123
+ # {
124
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
125
+ # "nrs" => [1, 2, 3, 4, 5, 6]
126
+ # }
127
+ # )
128
+ # df.group_by("letters").head(2).sort("letters")
129
+ # # =>
130
+ # # shape: (5, 2)
131
+ # # ┌─────────┬─────┐
132
+ # # │ letters ┆ nrs │
133
+ # # │ --- ┆ --- │
134
+ # # │ str ┆ i64 │
135
+ # # ╞═════════╪═════╡
136
+ # # │ a ┆ 3 │
137
+ # # │ a ┆ 5 │
138
+ # # │ b ┆ 6 │
139
+ # # │ c ┆ 1 │
140
+ # # │ c ┆ 2 │
141
+ # # └─────────┴─────┘
142
+ def head(n = 5)
143
+ Utils.wrap_ldf(@lgb.head(n))
144
+ end
145
+
146
+ # Get the last `n` rows of each group.
147
+ #
148
+ # @param n [Integer]
149
+ # Number of rows to return.
150
+ #
151
+ # @return [LazyFrame]
152
+ #
153
+ # @example
154
+ # df = Polars::DataFrame.new(
155
+ # {
156
+ # "letters" => ["c", "c", "a", "c", "a", "b"],
157
+ # "nrs" => [1, 2, 3, 4, 5, 6]
158
+ # }
159
+ # )
160
+ # df.group_by("letters").tail(2).sort("letters")
161
+ # # =>
162
+ # # shape: (5, 2)
163
+ # # ┌─────────┬─────┐
164
+ # # │ letters ┆ nrs │
165
+ # # │ --- ┆ --- │
166
+ # # │ str ┆ i64 │
167
+ # # ╞═════════╪═════╡
168
+ # # │ a ┆ 3 │
169
+ # # │ a ┆ 5 │
170
+ # # │ b ┆ 6 │
171
+ # # │ c ┆ 2 │
172
+ # # │ c ┆ 4 │
173
+ # # └─────────┴─────┘
174
+ def tail(n = 5)
175
+ Utils.wrap_ldf(@lgb.tail(n))
176
+ end
177
+
178
+ # def apply
179
+ # end
180
+ end
181
+ end