polars-df 0.2.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38856 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.bundle +0 -0
  10. data/lib/polars/3.1/polars.bundle +0 -0
  11. data/lib/polars/3.2/polars.bundle +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
@@ -0,0 +1,346 @@
1
+ module Polars
2
+ # Series.arr namespace.
3
+ class ListNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "arr"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Get the length of the arrays as UInt32.
14
+ #
15
+ # @return [Series]
16
+ #
17
+ # @example
18
+ # s = Polars::Series.new([[1, 2, 3], [5]])
19
+ # s.arr.lengths
20
+ # # =>
21
+ # # shape: (2,)
22
+ # # Series: '' [u32]
23
+ # # [
24
+ # # 3
25
+ # # 1
26
+ # # ]
27
+ def lengths
28
+ super
29
+ end
30
+
31
+ # Sum all the arrays in the list.
32
+ #
33
+ # @return [Series]
34
+ def sum
35
+ super
36
+ end
37
+
38
+ # Compute the max value of the arrays in the list.
39
+ #
40
+ # @return [Series]
41
+ def max
42
+ super
43
+ end
44
+
45
+ # Compute the min value of the arrays in the list.
46
+ #
47
+ # @return [Series]
48
+ def min
49
+ super
50
+ end
51
+
52
+ # Compute the mean value of the arrays in the list.
53
+ #
54
+ # @return [Series]
55
+ def mean
56
+ super
57
+ end
58
+
59
+ # Sort the arrays in the list.
60
+ #
61
+ # @return [Series]
62
+ def sort(reverse: false)
63
+ super
64
+ end
65
+
66
+ # Reverse the arrays in the list.
67
+ #
68
+ # @return [Series]
69
+ def reverse
70
+ super
71
+ end
72
+
73
+ # Get the unique/distinct values in the list.
74
+ #
75
+ # @return [Series]
76
+ def unique
77
+ super
78
+ end
79
+
80
+ # Concat the arrays in a Series dtype List in linear time.
81
+ #
82
+ # @param other [Object]
83
+ # Columns to concat into a List Series
84
+ #
85
+ # @return [Series]
86
+ def concat(other)
87
+ super
88
+ end
89
+
90
+ # Get the value by index in the sublists.
91
+ #
92
+ # So index `0` would return the first item of every sublist
93
+ # and index `-1` would return the last item of every sublist
94
+ # if an index is out of bounds, it will return a `None`.
95
+ #
96
+ # @param index [Integer]
97
+ # Index to return per sublist
98
+ #
99
+ # @return [Series]
100
+ def get(index)
101
+ super
102
+ end
103
+
104
+ # Get the value by index in the sublists.
105
+ #
106
+ # @return [Series]
107
+ def [](item)
108
+ get(item)
109
+ end
110
+
111
+ # Join all string items in a sublist and place a separator between them.
112
+ #
113
+ # This errors if inner type of list `!= Utf8`.
114
+ #
115
+ # @param separator [String]
116
+ # string to separate the items with
117
+ #
118
+ # @return [Series]
119
+ #
120
+ # @example
121
+ # s = Polars::Series.new([["foo", "bar"], ["hello", "world"]])
122
+ # s.arr.join("-")
123
+ # # =>
124
+ # # shape: (2,)
125
+ # # Series: '' [str]
126
+ # # [
127
+ # # "foo-bar"
128
+ # # "hello-world"
129
+ # # ]
130
+ def join(separator)
131
+ super
132
+ end
133
+
134
+ # Get the first value of the sublists.
135
+ #
136
+ # @return [Series]
137
+ def first
138
+ super
139
+ end
140
+
141
+ # Get the last value of the sublists.
142
+ #
143
+ # @return [Series]
144
+ def last
145
+ super
146
+ end
147
+
148
+ # Check if sublists contain the given item.
149
+ #
150
+ # @param item [Object]
151
+ # Item that will be checked for membership.
152
+ #
153
+ # @return [Series]
154
+ def contains(item)
155
+ super
156
+ end
157
+
158
+ # Retrieve the index of the minimal value in every sublist.
159
+ #
160
+ # @return [Series]
161
+ def arg_min
162
+ super
163
+ end
164
+
165
+ # Retrieve the index of the maximum value in every sublist.
166
+ #
167
+ # @return [Series]
168
+ def arg_max
169
+ super
170
+ end
171
+
172
+ # Calculate the n-th discrete difference of every sublist.
173
+ #
174
+ # @param n [Integer]
175
+ # Number of slots to shift.
176
+ # @param null_behavior ["ignore", "drop"]
177
+ # How to handle null values.
178
+ #
179
+ # @return [Series]
180
+ #
181
+ # @example
182
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
183
+ # s.arr.diff
184
+ # # =>
185
+ # # shape: (2,)
186
+ # # Series: 'a' [list]
187
+ # # [
188
+ # # [null, 1, ... 1]
189
+ # # [null, -8, -1]
190
+ # # ]
191
+ def diff(n: 1, null_behavior: "ignore")
192
+ super
193
+ end
194
+
195
+ # Shift values by the given period.
196
+ #
197
+ # @param periods [Integer]
198
+ # Number of places to shift (may be negative).
199
+ #
200
+ # @return [Series]
201
+ #
202
+ # @example
203
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
204
+ # s.arr.shift
205
+ # # =>
206
+ # # shape: (2,)
207
+ # # Series: 'a' [list]
208
+ # # [
209
+ # # [null, 1, ... 3]
210
+ # # [null, 10, 2]
211
+ # # ]
212
+ def shift(periods = 1)
213
+ super
214
+ end
215
+
216
+ # Slice every sublist.
217
+ #
218
+ # @param offset [Integer]
219
+ # Start index. Negative indexing is supported.
220
+ # @param length [Integer]
221
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
222
+ # end of the list.
223
+ #
224
+ # @return [Series]
225
+ #
226
+ # @example
227
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
228
+ # s.arr.slice(1, 2)
229
+ # # =>
230
+ # # shape: (2,)
231
+ # # Series: 'a' [list]
232
+ # # [
233
+ # # [2, 3]
234
+ # # [2, 1]
235
+ # # ]
236
+ def slice(offset, length = nil)
237
+ super
238
+ end
239
+
240
+ # Slice the first `n` values of every sublist.
241
+ #
242
+ # @param n [Integer]
243
+ # Number of values to return for each sublist.
244
+ #
245
+ # @return [Series]
246
+ #
247
+ # @example
248
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
249
+ # s.arr.head(2)
250
+ # # =>
251
+ # # shape: (2,)
252
+ # # Series: 'a' [list]
253
+ # # [
254
+ # # [1, 2]
255
+ # # [10, 2]
256
+ # # ]
257
+ def head(n = 5)
258
+ super
259
+ end
260
+
261
+ # Slice the last `n` values of every sublist.
262
+ #
263
+ # @param n [Integer]
264
+ # Number of values to return for each sublist.
265
+ #
266
+ # @return [Series]
267
+ #
268
+ # @example
269
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
270
+ # s.arr.tail(2)
271
+ # # =>
272
+ # # shape: (2,)
273
+ # # Series: 'a' [list]
274
+ # # [
275
+ # # [3, 4]
276
+ # # [2, 1]
277
+ # # ]
278
+ def tail(n = 5)
279
+ super
280
+ end
281
+
282
+ # Convert the series of type `List` to a series of type `Struct`.
283
+ #
284
+ # @param n_field_strategy ["first_non_null", "max_width"]
285
+ # Strategy to determine the number of fields of the struct.
286
+ # @param name_generator [Object]
287
+ # A custom function that can be used to generate the field names.
288
+ # Default field names are `field_0, field_1 .. field_n`
289
+ #
290
+ # @return [Series]
291
+ #
292
+ # @example
293
+ # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
294
+ # df.select([Polars.col("a").arr.to_struct])
295
+ # # =>
296
+ # # shape: (2, 1)
297
+ # # ┌────────────┐
298
+ # # │ a │
299
+ # # │ --- │
300
+ # # │ struct[3] │
301
+ # # ╞════════════╡
302
+ # # │ {1,2,3} │
303
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
304
+ # # │ {1,2,null} │
305
+ # # └────────────┘
306
+ def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
307
+ super
308
+ end
309
+
310
+ # Run any polars expression against the lists' elements.
311
+ #
312
+ # @param expr [Expr]
313
+ # Expression to run. Note that you can select an element with `Polars.first`, or
314
+ # `Polars.col`
315
+ # @param parallel [Boolean]
316
+ # Run all expression parallel. Don't activate this blindly.
317
+ # Parallelism is worth it if there is enough work to do per thread.
318
+ #
319
+ # This likely should not be use in the groupby context, because we already
320
+ # parallel execution per group
321
+ #
322
+ # @return [Series]
323
+ #
324
+ # @example
325
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
326
+ # df.with_column(
327
+ # Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
328
+ # )
329
+ # # =>
330
+ # # shape: (3, 3)
331
+ # # ┌─────┬─────┬────────────┐
332
+ # # │ a ┆ b ┆ rank │
333
+ # # │ --- ┆ --- ┆ --- │
334
+ # # │ i64 ┆ i64 ┆ list[f32] │
335
+ # # ╞═════╪═════╪════════════╡
336
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
337
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
338
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
339
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
340
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
341
+ # # └─────┴─────┴────────────┘
342
+ def eval(expr, parallel: false)
343
+ super
344
+ end
345
+ end
346
+ end
@@ -0,0 +1,54 @@
1
+ module Polars
2
+ # Namespace for expressions on a meta level.
3
+ class MetaExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Equal.
13
+ #
14
+ # @return [Boolean]
15
+ def ==(other)
16
+ _rbexpr.meta_eq(other._rbexpr)
17
+ end
18
+
19
+ # Not equal.
20
+ #
21
+ # @return [Boolean]
22
+ def !=(other)
23
+ !(self == other)
24
+ end
25
+
26
+ # Pop the latest expression and return the input(s) of the popped expression.
27
+ #
28
+ # @return [Array]
29
+ def pop
30
+ _rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
31
+ end
32
+
33
+ # Get a list with the root column name.
34
+ #
35
+ # @return [Array]
36
+ def root_names
37
+ _rbexpr.meta_roots
38
+ end
39
+
40
+ # Get the column name that this expression would produce.
41
+ #
42
+ # @return [String]
43
+ def output_name
44
+ _rbexpr.meta_output_name
45
+ end
46
+
47
+ # Undo any renaming operation like `alias` or `keep_name`.
48
+ #
49
+ # @return [Expr]
50
+ def undo_aliases
51
+ Utils.wrap_expr(_rbexpr.meta_undo_aliases)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,35 @@
1
+ module Polars
2
+ # A rolling grouper.
3
+ #
4
+ # This has an `.agg` method which will allow you to run all polars expressions in a
5
+ # groupby context.
6
+ class RollingGroupBy
7
+ def initialize(
8
+ df,
9
+ index_column,
10
+ period,
11
+ offset,
12
+ closed,
13
+ by
14
+ )
15
+ period = Utils._timedelta_to_pl_duration(period)
16
+ offset = Utils._timedelta_to_pl_duration(offset)
17
+
18
+ @df = df
19
+ @time_column = index_column
20
+ @period = period
21
+ @offset = offset
22
+ @closed = closed
23
+ @by = by
24
+ end
25
+
26
+ def agg(aggs)
27
+ @df.lazy
28
+ .groupby_rolling(
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by
30
+ )
31
+ .agg(aggs)
32
+ .collect(no_optimization: true, string_cache: false)
33
+ end
34
+ end
35
+ end