polars-df 0.2.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38828 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.so +0 -0
  10. data/lib/polars/3.1/polars.so +0 -0
  11. data/lib/polars/3.2/polars.so +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
@@ -0,0 +1,346 @@
1
+ module Polars
2
+ # Series.arr namespace.
3
+ class ListNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "arr"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Get the length of the arrays as UInt32.
14
+ #
15
+ # @return [Series]
16
+ #
17
+ # @example
18
+ # s = Polars::Series.new([[1, 2, 3], [5]])
19
+ # s.arr.lengths
20
+ # # =>
21
+ # # shape: (2,)
22
+ # # Series: '' [u32]
23
+ # # [
24
+ # # 3
25
+ # # 1
26
+ # # ]
27
+ def lengths
28
+ super
29
+ end
30
+
31
+ # Sum all the arrays in the list.
32
+ #
33
+ # @return [Series]
34
+ def sum
35
+ super
36
+ end
37
+
38
+ # Compute the max value of the arrays in the list.
39
+ #
40
+ # @return [Series]
41
+ def max
42
+ super
43
+ end
44
+
45
+ # Compute the min value of the arrays in the list.
46
+ #
47
+ # @return [Series]
48
+ def min
49
+ super
50
+ end
51
+
52
+ # Compute the mean value of the arrays in the list.
53
+ #
54
+ # @return [Series]
55
+ def mean
56
+ super
57
+ end
58
+
59
+ # Sort the arrays in the list.
60
+ #
61
+ # @return [Series]
62
+ def sort(reverse: false)
63
+ super
64
+ end
65
+
66
+ # Reverse the arrays in the list.
67
+ #
68
+ # @return [Series]
69
+ def reverse
70
+ super
71
+ end
72
+
73
+ # Get the unique/distinct values in the list.
74
+ #
75
+ # @return [Series]
76
+ def unique
77
+ super
78
+ end
79
+
80
+ # Concat the arrays in a Series dtype List in linear time.
81
+ #
82
+ # @param other [Object]
83
+ # Columns to concat into a List Series
84
+ #
85
+ # @return [Series]
86
+ def concat(other)
87
+ super
88
+ end
89
+
90
+ # Get the value by index in the sublists.
91
+ #
92
+ # So index `0` would return the first item of every sublist
93
+ # and index `-1` would return the last item of every sublist
94
+ # if an index is out of bounds, it will return a `None`.
95
+ #
96
+ # @param index [Integer]
97
+ # Index to return per sublist
98
+ #
99
+ # @return [Series]
100
+ def get(index)
101
+ super
102
+ end
103
+
104
+ # Get the value by index in the sublists.
105
+ #
106
+ # @return [Series]
107
+ def [](item)
108
+ get(item)
109
+ end
110
+
111
+ # Join all string items in a sublist and place a separator between them.
112
+ #
113
+ # This errors if inner type of list `!= Utf8`.
114
+ #
115
+ # @param separator [String]
116
+ # string to separate the items with
117
+ #
118
+ # @return [Series]
119
+ #
120
+ # @example
121
+ # s = Polars::Series.new([["foo", "bar"], ["hello", "world"]])
122
+ # s.arr.join("-")
123
+ # # =>
124
+ # # shape: (2,)
125
+ # # Series: '' [str]
126
+ # # [
127
+ # # "foo-bar"
128
+ # # "hello-world"
129
+ # # ]
130
+ def join(separator)
131
+ super
132
+ end
133
+
134
+ # Get the first value of the sublists.
135
+ #
136
+ # @return [Series]
137
+ def first
138
+ super
139
+ end
140
+
141
+ # Get the last value of the sublists.
142
+ #
143
+ # @return [Series]
144
+ def last
145
+ super
146
+ end
147
+
148
+ # Check if sublists contain the given item.
149
+ #
150
+ # @param item [Object]
151
+ # Item that will be checked for membership.
152
+ #
153
+ # @return [Series]
154
+ def contains(item)
155
+ super
156
+ end
157
+
158
+ # Retrieve the index of the minimal value in every sublist.
159
+ #
160
+ # @return [Series]
161
+ def arg_min
162
+ super
163
+ end
164
+
165
+ # Retrieve the index of the maximum value in every sublist.
166
+ #
167
+ # @return [Series]
168
+ def arg_max
169
+ super
170
+ end
171
+
172
+ # Calculate the n-th discrete difference of every sublist.
173
+ #
174
+ # @param n [Integer]
175
+ # Number of slots to shift.
176
+ # @param null_behavior ["ignore", "drop"]
177
+ # How to handle null values.
178
+ #
179
+ # @return [Series]
180
+ #
181
+ # @example
182
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
183
+ # s.arr.diff
184
+ # # =>
185
+ # # shape: (2,)
186
+ # # Series: 'a' [list]
187
+ # # [
188
+ # # [null, 1, ... 1]
189
+ # # [null, -8, -1]
190
+ # # ]
191
+ def diff(n: 1, null_behavior: "ignore")
192
+ super
193
+ end
194
+
195
+ # Shift values by the given period.
196
+ #
197
+ # @param periods [Integer]
198
+ # Number of places to shift (may be negative).
199
+ #
200
+ # @return [Series]
201
+ #
202
+ # @example
203
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
204
+ # s.arr.shift
205
+ # # =>
206
+ # # shape: (2,)
207
+ # # Series: 'a' [list]
208
+ # # [
209
+ # # [null, 1, ... 3]
210
+ # # [null, 10, 2]
211
+ # # ]
212
+ def shift(periods = 1)
213
+ super
214
+ end
215
+
216
+ # Slice every sublist.
217
+ #
218
+ # @param offset [Integer]
219
+ # Start index. Negative indexing is supported.
220
+ # @param length [Integer]
221
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
222
+ # end of the list.
223
+ #
224
+ # @return [Series]
225
+ #
226
+ # @example
227
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
228
+ # s.arr.slice(1, 2)
229
+ # # =>
230
+ # # shape: (2,)
231
+ # # Series: 'a' [list]
232
+ # # [
233
+ # # [2, 3]
234
+ # # [2, 1]
235
+ # # ]
236
+ def slice(offset, length = nil)
237
+ super
238
+ end
239
+
240
+ # Slice the first `n` values of every sublist.
241
+ #
242
+ # @param n [Integer]
243
+ # Number of values to return for each sublist.
244
+ #
245
+ # @return [Series]
246
+ #
247
+ # @example
248
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
249
+ # s.arr.head(2)
250
+ # # =>
251
+ # # shape: (2,)
252
+ # # Series: 'a' [list]
253
+ # # [
254
+ # # [1, 2]
255
+ # # [10, 2]
256
+ # # ]
257
+ def head(n = 5)
258
+ super
259
+ end
260
+
261
+ # Slice the last `n` values of every sublist.
262
+ #
263
+ # @param n [Integer]
264
+ # Number of values to return for each sublist.
265
+ #
266
+ # @return [Series]
267
+ #
268
+ # @example
269
+ # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
270
+ # s.arr.tail(2)
271
+ # # =>
272
+ # # shape: (2,)
273
+ # # Series: 'a' [list]
274
+ # # [
275
+ # # [3, 4]
276
+ # # [2, 1]
277
+ # # ]
278
+ def tail(n = 5)
279
+ super
280
+ end
281
+
282
+ # Convert the series of type `List` to a series of type `Struct`.
283
+ #
284
+ # @param n_field_strategy ["first_non_null", "max_width"]
285
+ # Strategy to determine the number of fields of the struct.
286
+ # @param name_generator [Object]
287
+ # A custom function that can be used to generate the field names.
288
+ # Default field names are `field_0, field_1 .. field_n`
289
+ #
290
+ # @return [Series]
291
+ #
292
+ # @example
293
+ # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
294
+ # df.select([Polars.col("a").arr.to_struct])
295
+ # # =>
296
+ # # shape: (2, 1)
297
+ # # ┌────────────┐
298
+ # # │ a │
299
+ # # │ --- │
300
+ # # │ struct[3] │
301
+ # # ╞════════════╡
302
+ # # │ {1,2,3} │
303
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
304
+ # # │ {1,2,null} │
305
+ # # └────────────┘
306
+ def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
307
+ super
308
+ end
309
+
310
+ # Run any polars expression against the lists' elements.
311
+ #
312
+ # @param expr [Expr]
313
+ # Expression to run. Note that you can select an element with `Polars.first`, or
314
+ # `Polars.col`
315
+ # @param parallel [Boolean]
316
+ # Run all expression parallel. Don't activate this blindly.
317
+ # Parallelism is worth it if there is enough work to do per thread.
318
+ #
319
+ # This likely should not be use in the groupby context, because we already
320
+ # parallel execution per group
321
+ #
322
+ # @return [Series]
323
+ #
324
+ # @example
325
+ # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
326
+ # df.with_column(
327
+ # Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
328
+ # )
329
+ # # =>
330
+ # # shape: (3, 3)
331
+ # # ┌─────┬─────┬────────────┐
332
+ # # │ a ┆ b ┆ rank │
333
+ # # │ --- ┆ --- ┆ --- │
334
+ # # │ i64 ┆ i64 ┆ list[f32] │
335
+ # # ╞═════╪═════╪════════════╡
336
+ # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
337
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
338
+ # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
339
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
340
+ # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
341
+ # # └─────┴─────┴────────────┘
342
+ def eval(expr, parallel: false)
343
+ super
344
+ end
345
+ end
346
+ end
@@ -0,0 +1,54 @@
1
+ module Polars
2
+ # Namespace for expressions on a meta level.
3
+ class MetaExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Equal.
13
+ #
14
+ # @return [Boolean]
15
+ def ==(other)
16
+ _rbexpr.meta_eq(other._rbexpr)
17
+ end
18
+
19
+ # Not equal.
20
+ #
21
+ # @return [Boolean]
22
+ def !=(other)
23
+ !(self == other)
24
+ end
25
+
26
+ # Pop the latest expression and return the input(s) of the popped expression.
27
+ #
28
+ # @return [Array]
29
+ def pop
30
+ _rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
31
+ end
32
+
33
+ # Get a list with the root column name.
34
+ #
35
+ # @return [Array]
36
+ def root_names
37
+ _rbexpr.meta_roots
38
+ end
39
+
40
+ # Get the column name that this expression would produce.
41
+ #
42
+ # @return [String]
43
+ def output_name
44
+ _rbexpr.meta_output_name
45
+ end
46
+
47
+ # Undo any renaming operation like `alias` or `keep_name`.
48
+ #
49
+ # @return [Expr]
50
+ def undo_aliases
51
+ Utils.wrap_expr(_rbexpr.meta_undo_aliases)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,35 @@
1
+ module Polars
2
+ # A rolling grouper.
3
+ #
4
+ # This has an `.agg` method which will allow you to run all polars expressions in a
5
+ # groupby context.
6
+ class RollingGroupBy
7
+ def initialize(
8
+ df,
9
+ index_column,
10
+ period,
11
+ offset,
12
+ closed,
13
+ by
14
+ )
15
+ period = Utils._timedelta_to_pl_duration(period)
16
+ offset = Utils._timedelta_to_pl_duration(offset)
17
+
18
+ @df = df
19
+ @time_column = index_column
20
+ @period = period
21
+ @offset = offset
22
+ @closed = closed
23
+ @by = by
24
+ end
25
+
26
+ def agg(aggs)
27
+ @df.lazy
28
+ .groupby_rolling(
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by
30
+ )
31
+ .agg(aggs)
32
+ .collect(no_optimization: true, string_cache: false)
33
+ end
34
+ end
35
+ end