polars-df 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +139 -6
- data/ext/polars/src/dataframe.rs +360 -15
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +135 -3
- data/ext/polars/src/lazy/dsl.rs +97 -2
- data/ext/polars/src/lazy/meta.rs +1 -1
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +227 -12
- data/ext/polars/src/series.rs +190 -38
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +96 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2813 -100
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +631 -11
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +763 -4
- data/lib/polars/lazy_frame.rb +1415 -67
- data/lib/polars/lazy_functions.rb +430 -9
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2244 -192
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +76 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +8 -2
- metadata +12 -2
data/lib/polars/series.rb
CHANGED
@@ -1,7 +1,40 @@
|
|
1
1
|
module Polars
|
2
|
+
# A Series represents a single column in a polars DataFrame.
|
2
3
|
class Series
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
# @private
|
3
7
|
attr_accessor :_s
|
4
8
|
|
9
|
+
# Create a new Series.
|
10
|
+
#
|
11
|
+
# @param name [String, Array, nil]
|
12
|
+
# Name of the series. Will be used as a column name when used in a DataFrame.
|
13
|
+
# When not specified, name is set to an empty string.
|
14
|
+
# @param values [Array, nil]
|
15
|
+
# One-dimensional data in various forms. Supported are: Array and Series.
|
16
|
+
# @param dtype [Symbol, nil]
|
17
|
+
# Polars dtype of the Series data. If not specified, the dtype is inferred.
|
18
|
+
# @param strict [Boolean]
|
19
|
+
# Throw error on numeric overflow.
|
20
|
+
# @param nan_to_null [Boolean]
|
21
|
+
# Not used.
|
22
|
+
# @param dtype_if_empty [Symbol, nil]
|
23
|
+
# If no dtype is specified and values contains `nil` or an empty array,
|
24
|
+
# set the Polars dtype of the Series data. If not specified, Float32 is used.
|
25
|
+
#
|
26
|
+
# @example Constructing a Series by specifying name and values positionally:
|
27
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
28
|
+
#
|
29
|
+
# @example Notice that the dtype is automatically inferred as a polars Int64:
|
30
|
+
# s.dtype
|
31
|
+
# # => :i64
|
32
|
+
#
|
33
|
+
# @example Constructing a Series with a specific dtype:
|
34
|
+
# s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
|
35
|
+
#
|
36
|
+
# @example It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords.
|
37
|
+
# s3 = Polars::Series.new([1, 2, 3])
|
5
38
|
def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
|
6
39
|
# Handle case where values are passed as the first argument
|
7
40
|
if !name.nil? && !name.is_a?(String)
|
@@ -17,6 +50,8 @@ module Polars
|
|
17
50
|
|
18
51
|
if values.nil?
|
19
52
|
self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
|
53
|
+
elsif values.is_a?(Series)
|
54
|
+
self._s = series_to_rbseries(name, values)
|
20
55
|
elsif values.is_a?(Range)
|
21
56
|
self._s =
|
22
57
|
Polars.arange(
|
@@ -35,16 +70,23 @@ module Polars
|
|
35
70
|
end
|
36
71
|
end
|
37
72
|
|
73
|
+
# @private
|
38
74
|
def self._from_rbseries(s)
|
39
75
|
series = Series.allocate
|
40
76
|
series._s = s
|
41
77
|
series
|
42
78
|
end
|
43
79
|
|
80
|
+
# Get the data type of this Series.
|
81
|
+
#
|
82
|
+
# @return [Symbol]
|
44
83
|
def dtype
|
45
|
-
_s.dtype
|
84
|
+
_s.dtype
|
46
85
|
end
|
47
86
|
|
87
|
+
# Get flags that are set on the Series.
|
88
|
+
#
|
89
|
+
# @return [Hash]
|
48
90
|
def flags
|
49
91
|
{
|
50
92
|
"SORTED_ASC" => _s.is_sorted_flag,
|
@@ -52,154 +94,491 @@ module Polars
|
|
52
94
|
}
|
53
95
|
end
|
54
96
|
|
97
|
+
# Get the inner dtype in of a List typed Series.
|
98
|
+
#
|
99
|
+
# @return [Symbol]
|
55
100
|
def inner_dtype
|
56
|
-
_s.inner_dtype
|
101
|
+
_s.inner_dtype
|
57
102
|
end
|
58
103
|
|
104
|
+
# Get the name of this Series.
|
105
|
+
#
|
106
|
+
# @return [String]
|
59
107
|
def name
|
60
108
|
_s.name
|
61
109
|
end
|
62
110
|
|
111
|
+
# Shape of this Series.
|
112
|
+
#
|
113
|
+
# @return [Array]
|
63
114
|
def shape
|
64
115
|
[_s.len]
|
65
116
|
end
|
66
117
|
|
67
|
-
#
|
68
|
-
#
|
118
|
+
# Get the time unit of underlying Datetime Series as `"ns"`, `"us"`, or `"ms"`.
|
119
|
+
#
|
120
|
+
# @return [String]
|
121
|
+
def time_unit
|
122
|
+
_s.time_unit
|
123
|
+
end
|
69
124
|
|
125
|
+
# Returns a string representing the Series.
|
126
|
+
#
|
127
|
+
# @return [String]
|
70
128
|
def to_s
|
71
129
|
_s.to_s
|
72
130
|
end
|
73
131
|
alias_method :inspect, :to_s
|
74
132
|
|
133
|
+
# Bitwise AND.
|
134
|
+
#
|
135
|
+
# @return [Series]
|
75
136
|
def &(other)
|
137
|
+
if !other.is_a?(Series)
|
138
|
+
other = Series.new([other])
|
139
|
+
end
|
76
140
|
Utils.wrap_s(_s.bitand(other._s))
|
77
141
|
end
|
78
142
|
|
143
|
+
# Bitwise OR.
|
144
|
+
#
|
145
|
+
# @return [Series]
|
79
146
|
def |(other)
|
147
|
+
if !other.is_a?(Series)
|
148
|
+
other = Series.new([other])
|
149
|
+
end
|
80
150
|
Utils.wrap_s(_s.bitor(other._s))
|
81
151
|
end
|
82
152
|
|
153
|
+
# Bitwise XOR.
|
154
|
+
#
|
155
|
+
# @return [Series]
|
83
156
|
def ^(other)
|
157
|
+
if !other.is_a?(Series)
|
158
|
+
other = Series.new([other])
|
159
|
+
end
|
84
160
|
Utils.wrap_s(_s.bitxor(other._s))
|
85
161
|
end
|
86
162
|
|
87
|
-
#
|
88
|
-
#
|
163
|
+
# Equal.
|
164
|
+
#
|
165
|
+
# @return [Series]
|
166
|
+
def ==(other)
|
167
|
+
_comp(other, :eq)
|
168
|
+
end
|
89
169
|
|
90
|
-
#
|
91
|
-
#
|
170
|
+
# Not equal.
|
171
|
+
#
|
172
|
+
# @return [Series]
|
173
|
+
def !=(other)
|
174
|
+
_comp(other, :neq)
|
175
|
+
end
|
92
176
|
|
93
|
-
#
|
94
|
-
#
|
177
|
+
# Greater than.
|
178
|
+
#
|
179
|
+
# @return [Series]
|
180
|
+
def >(other)
|
181
|
+
_comp(other, :gt)
|
182
|
+
end
|
95
183
|
|
96
|
-
#
|
97
|
-
#
|
184
|
+
# Less than.
|
185
|
+
#
|
186
|
+
# @return [Series]
|
187
|
+
def <(other)
|
188
|
+
_comp(other, :lt)
|
189
|
+
end
|
98
190
|
|
99
|
-
#
|
100
|
-
#
|
191
|
+
# Greater than or equal.
|
192
|
+
#
|
193
|
+
# @return [Series]
|
194
|
+
def >=(other)
|
195
|
+
_comp(other, :gt_eq)
|
196
|
+
end
|
101
197
|
|
102
|
-
#
|
103
|
-
#
|
198
|
+
# Less than or equal.
|
199
|
+
#
|
200
|
+
# @return [Series]
|
201
|
+
def <=(other)
|
202
|
+
_comp(other, :lt_eq)
|
203
|
+
end
|
104
204
|
|
205
|
+
# Performs addition.
|
206
|
+
#
|
207
|
+
# @return [Series]
|
105
208
|
def +(other)
|
106
|
-
|
209
|
+
_arithmetic(other, :add)
|
107
210
|
end
|
108
211
|
|
212
|
+
# Performs subtraction.
|
213
|
+
#
|
214
|
+
# @return [Series]
|
109
215
|
def -(other)
|
110
|
-
|
216
|
+
_arithmetic(other, :sub)
|
111
217
|
end
|
112
218
|
|
219
|
+
# Performs multiplication.
|
220
|
+
#
|
221
|
+
# @return [Series]
|
113
222
|
def *(other)
|
114
|
-
|
223
|
+
_arithmetic(other, :mul)
|
115
224
|
end
|
116
225
|
|
226
|
+
# Performs division.
|
227
|
+
#
|
228
|
+
# @return [Series]
|
117
229
|
def /(other)
|
118
|
-
|
230
|
+
_arithmetic(other, :div)
|
119
231
|
end
|
120
232
|
|
233
|
+
# Returns the modulo.
|
234
|
+
#
|
235
|
+
# @return [Series]
|
236
|
+
def %(other)
|
237
|
+
if is_datelike
|
238
|
+
raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
|
239
|
+
end
|
240
|
+
_arithmetic(other, :rem)
|
241
|
+
end
|
242
|
+
|
243
|
+
# Raises to the power of exponent.
|
244
|
+
#
|
245
|
+
# @return [Series]
|
121
246
|
def **(power)
|
122
|
-
|
123
|
-
|
124
|
-
|
247
|
+
if is_datelike
|
248
|
+
raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
|
249
|
+
end
|
125
250
|
to_frame.select(Polars.col(name).pow(power)).to_series
|
126
251
|
end
|
127
252
|
|
128
|
-
#
|
129
|
-
#
|
253
|
+
# Performs negation.
|
254
|
+
#
|
255
|
+
# @return [Series]
|
256
|
+
def -@
|
257
|
+
0 - self
|
258
|
+
end
|
130
259
|
|
260
|
+
# Returns elements of the Series.
|
261
|
+
#
|
262
|
+
# @return [Object]
|
131
263
|
def [](item)
|
132
|
-
|
264
|
+
if item.is_a?(Integer)
|
265
|
+
return _s.get_idx(item)
|
266
|
+
end
|
267
|
+
|
268
|
+
if item.is_a?(Range)
|
269
|
+
return Slice.new(self).apply(item)
|
270
|
+
end
|
271
|
+
|
272
|
+
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
133
273
|
end
|
134
274
|
|
135
|
-
#
|
136
|
-
#
|
275
|
+
# Sets an element of the Series.
|
276
|
+
#
|
277
|
+
# @return [Object]
|
278
|
+
def []=(key, value)
|
279
|
+
if value.is_a?(Array)
|
280
|
+
if is_numeric || is_datelike
|
281
|
+
set_at_idx(key, value)
|
282
|
+
return
|
283
|
+
end
|
284
|
+
raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
|
285
|
+
end
|
286
|
+
|
287
|
+
if key.is_a?(Series)
|
288
|
+
if key.dtype == :bool
|
289
|
+
self._s = set(key, value)._s
|
290
|
+
elsif key.dtype == :u64
|
291
|
+
self._s = set_at_idx(key.cast(:u32), value)._s
|
292
|
+
elsif key.dtype == :u32
|
293
|
+
self._s = set_at_idx(key, value)._s
|
294
|
+
else
|
295
|
+
raise Todo
|
296
|
+
end
|
297
|
+
end
|
137
298
|
|
299
|
+
if key.is_a?(Array)
|
300
|
+
s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
|
301
|
+
self[s] = value
|
302
|
+
elsif key.is_a?(Integer)
|
303
|
+
# TODO fix
|
304
|
+
# self[[key]] = value
|
305
|
+
set_at_idx(key, value)
|
306
|
+
else
|
307
|
+
raise ArgumentError, "cannot use #{key} for indexing"
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
# Return an estimation of the total (heap) allocated size of the Series.
|
312
|
+
#
|
313
|
+
# Estimated size is given in the specified unit (bytes by default).
|
314
|
+
#
|
315
|
+
# This estimation is the sum of the size of its buffers, validity, including
|
316
|
+
# nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
|
317
|
+
# size of 2 arrays is not the sum of the sizes computed from this function. In
|
318
|
+
# particular, StructArray's size is an upper bound.
|
319
|
+
#
|
320
|
+
# When an array is sliced, its allocated size remains constant because the buffer
|
321
|
+
# unchanged. However, this function will yield a smaller number. This is because
|
322
|
+
# this function returns the visible size of the buffer, not its total capacity.
|
323
|
+
#
|
324
|
+
# FFI buffers are included in this estimation.
|
325
|
+
#
|
326
|
+
# @param unit ["b", "kb", "mb", "gb", "tb"]
|
327
|
+
# Scale the returned size to the given unit.
|
328
|
+
#
|
329
|
+
# @return [Numeric]
|
330
|
+
#
|
331
|
+
# @example
|
332
|
+
# s = Polars::Series.new("values", 1..1_000_000, dtype: :u32)
|
333
|
+
# s.estimated_size
|
334
|
+
# # => 4000000
|
335
|
+
# s.estimated_size("mb")
|
336
|
+
# # => 3.814697265625
|
138
337
|
def estimated_size(unit = "b")
|
139
338
|
sz = _s.estimated_size
|
140
339
|
Utils.scale_bytes(sz, to: unit)
|
141
340
|
end
|
142
341
|
|
342
|
+
# Compute the square root of the elements.
|
343
|
+
#
|
344
|
+
# @return [Series]
|
143
345
|
def sqrt
|
144
|
-
self
|
346
|
+
self**0.5
|
145
347
|
end
|
146
348
|
|
349
|
+
# Check if any boolean value in the column is `true`.
|
350
|
+
#
|
351
|
+
# @return [Boolean]
|
147
352
|
def any
|
148
353
|
to_frame.select(Polars.col(name).any).to_series[0]
|
149
354
|
end
|
150
355
|
|
356
|
+
# Check if all boolean values in the column are `true`.
|
357
|
+
#
|
358
|
+
# @return [Boolean]
|
151
359
|
def all
|
152
360
|
to_frame.select(Polars.col(name).all).to_series[0]
|
153
361
|
end
|
154
362
|
|
155
|
-
#
|
156
|
-
#
|
363
|
+
# Compute the logarithm to a given base.
|
364
|
+
#
|
365
|
+
# @param base [Float]
|
366
|
+
# Given base, defaults to `Math::E`.
|
367
|
+
#
|
368
|
+
# @return [Series]
|
369
|
+
def log(base = Math::E)
|
370
|
+
super
|
371
|
+
end
|
157
372
|
|
158
|
-
#
|
159
|
-
#
|
373
|
+
# Compute the base 10 logarithm of the input array, element-wise.
|
374
|
+
#
|
375
|
+
# @return [Series]
|
376
|
+
def log10
|
377
|
+
super
|
378
|
+
end
|
160
379
|
|
161
|
-
#
|
162
|
-
#
|
380
|
+
# Compute the exponential, element-wise.
|
381
|
+
#
|
382
|
+
# @return [Series]
|
383
|
+
def exp
|
384
|
+
super
|
385
|
+
end
|
163
386
|
|
164
|
-
#
|
165
|
-
#
|
387
|
+
# Create a new Series that copies data from this Series without null values.
|
388
|
+
#
|
389
|
+
# @return [Series]
|
390
|
+
def drop_nulls
|
391
|
+
super
|
392
|
+
end
|
166
393
|
|
167
|
-
#
|
168
|
-
#
|
394
|
+
# Drop NaN values.
|
395
|
+
#
|
396
|
+
# @return [Series]
|
397
|
+
def drop_nans
|
398
|
+
super
|
399
|
+
end
|
169
400
|
|
401
|
+
# Cast this Series to a DataFrame.
|
402
|
+
#
|
403
|
+
# @return [DataFrame]
|
170
404
|
def to_frame
|
171
405
|
Utils.wrap_df(RbDataFrame.new([_s]))
|
172
406
|
end
|
173
407
|
|
174
|
-
#
|
175
|
-
#
|
408
|
+
# Quick summary statistics of a series.
|
409
|
+
#
|
410
|
+
# Series with mixed datatypes will return summary statistics for the datatype of
|
411
|
+
# the first value.
|
412
|
+
#
|
413
|
+
# @return [DataFrame]
|
414
|
+
#
|
415
|
+
# @example
|
416
|
+
# series_num = Polars::Series.new([1, 2, 3, 4, 5])
|
417
|
+
# series_num.describe
|
418
|
+
# # =>
|
419
|
+
# # shape: (6, 2)
|
420
|
+
# # ┌────────────┬──────────┐
|
421
|
+
# # │ statistic ┆ value │
|
422
|
+
# # │ --- ┆ --- │
|
423
|
+
# # │ str ┆ f64 │
|
424
|
+
# # ╞════════════╪══════════╡
|
425
|
+
# # │ min ┆ 1.0 │
|
426
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
427
|
+
# # │ max ┆ 5.0 │
|
428
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
429
|
+
# # │ null_count ┆ 0.0 │
|
430
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
431
|
+
# # │ mean ┆ 3.0 │
|
432
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
433
|
+
# # │ std ┆ 1.581139 │
|
434
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
435
|
+
# # │ count ┆ 5.0 │
|
436
|
+
# # └────────────┴──────────┘
|
437
|
+
#
|
438
|
+
# @example
|
439
|
+
# series_str = Polars::Series.new(["a", "a", nil, "b", "c"])
|
440
|
+
# series_str.describe
|
441
|
+
# # =>
|
442
|
+
# # shape: (3, 2)
|
443
|
+
# # ┌────────────┬───────┐
|
444
|
+
# # │ statistic ┆ value │
|
445
|
+
# # │ --- ┆ --- │
|
446
|
+
# # │ str ┆ i64 │
|
447
|
+
# # ╞════════════╪═══════╡
|
448
|
+
# # │ unique ┆ 4 │
|
449
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
450
|
+
# # │ null_count ┆ 1 │
|
451
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
452
|
+
# # │ count ┆ 5 │
|
453
|
+
# # └────────────┴───────┘
|
454
|
+
def describe
|
455
|
+
if len == 0
|
456
|
+
raise ArgumentError, "Series must contain at least one value"
|
457
|
+
elsif is_numeric
|
458
|
+
s = cast(:f64)
|
459
|
+
stats = {
|
460
|
+
"min" => s.min,
|
461
|
+
"max" => s.max,
|
462
|
+
"null_count" => s.null_count,
|
463
|
+
"mean" => s.mean,
|
464
|
+
"std" => s.std,
|
465
|
+
"count" => s.len
|
466
|
+
}
|
467
|
+
elsif is_boolean
|
468
|
+
stats = {
|
469
|
+
"sum" => sum,
|
470
|
+
"null_count" => null_count,
|
471
|
+
"count" => len
|
472
|
+
}
|
473
|
+
elsif is_utf8
|
474
|
+
stats = {
|
475
|
+
"unique" => unique.length,
|
476
|
+
"null_count" => null_count,
|
477
|
+
"count" => len
|
478
|
+
}
|
479
|
+
elsif is_datelike
|
480
|
+
# we coerce all to string, because a polars column
|
481
|
+
# only has a single dtype and dates: datetime and count: int don't match
|
482
|
+
stats = {
|
483
|
+
"min" => dt.min.to_s,
|
484
|
+
"max" => dt.max.to_s,
|
485
|
+
"null_count" => null_count.to_s,
|
486
|
+
"count" => len.to_s
|
487
|
+
}
|
488
|
+
else
|
489
|
+
raise TypeError, "This type is not supported"
|
490
|
+
end
|
176
491
|
|
492
|
+
Polars::DataFrame.new(
|
493
|
+
{"statistic" => stats.keys, "value" => stats.values}
|
494
|
+
)
|
495
|
+
end
|
496
|
+
|
497
|
+
# Reduce this Series to the sum value.
|
498
|
+
#
|
499
|
+
# @return [Numeric]
|
500
|
+
#
|
501
|
+
# @note
|
502
|
+
# Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
503
|
+
# `:i64` before summing to prevent overflow issues.
|
504
|
+
#
|
505
|
+
# @example
|
506
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
507
|
+
# s.sum
|
508
|
+
# # => 6
|
177
509
|
def sum
|
178
510
|
_s.sum
|
179
511
|
end
|
180
512
|
|
513
|
+
# Reduce this Series to the mean value.
|
514
|
+
#
|
515
|
+
# @return [Float, nil]
|
516
|
+
#
|
517
|
+
# @example
|
518
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
519
|
+
# s.mean
|
520
|
+
# # => 2.0
|
181
521
|
def mean
|
182
522
|
_s.mean
|
183
523
|
end
|
184
524
|
|
525
|
+
# Reduce this Series to the product value.
|
526
|
+
#
|
527
|
+
# @return [Numeric]
|
185
528
|
def product
|
186
529
|
to_frame.select(Polars.col(name).product).to_series[0]
|
187
530
|
end
|
188
531
|
|
532
|
+
# Get the minimal value in this Series.
|
533
|
+
#
|
534
|
+
# @return [Object]
|
535
|
+
#
|
536
|
+
# @example
|
537
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
538
|
+
# s.min
|
539
|
+
# # => 1
|
189
540
|
def min
|
190
541
|
_s.min
|
191
542
|
end
|
192
543
|
|
544
|
+
# Get the maximum value in this Series.
|
545
|
+
#
|
546
|
+
# @return [Object]
|
547
|
+
#
|
548
|
+
# @example
|
549
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
550
|
+
# s.max
|
551
|
+
# # => 3
|
193
552
|
def max
|
194
553
|
_s.max
|
195
554
|
end
|
196
555
|
|
197
|
-
#
|
198
|
-
#
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
556
|
+
# Get maximum value, but propagate/poison encountered NaN values.
|
557
|
+
#
|
558
|
+
# @return [Object]
|
559
|
+
def nan_max
|
560
|
+
to_frame.select(Polars.col(name).nan_max)[0, 0]
|
561
|
+
end
|
562
|
+
|
563
|
+
# Get minimum value, but propagate/poison encountered NaN values.
|
564
|
+
#
|
565
|
+
# @return [Object]
|
566
|
+
def nan_min
|
567
|
+
to_frame.select(Polars.col(name).nan_min)[0, 0]
|
568
|
+
end
|
569
|
+
|
570
|
+
# Get the standard deviation of this Series.
|
571
|
+
#
|
572
|
+
# @param ddof [Integer]
|
573
|
+
# “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
|
574
|
+
# where N represents the number of elements.
|
575
|
+
#
|
576
|
+
# @return [Float, nil]
|
577
|
+
#
|
578
|
+
# @example
|
579
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
580
|
+
# s.std
|
581
|
+
# # => 1.0
|
203
582
|
def std(ddof: 1)
|
204
583
|
if !is_numeric
|
205
584
|
nil
|
@@ -208,6 +587,18 @@ module Polars
|
|
208
587
|
end
|
209
588
|
end
|
210
589
|
|
590
|
+
# Get variance of this Series.
|
591
|
+
#
|
592
|
+
# @param ddof [Integer]
|
593
|
+
# “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
|
594
|
+
# where N represents the number of elements.
|
595
|
+
#
|
596
|
+
# @return [Float, nil]
|
597
|
+
#
|
598
|
+
# @example
|
599
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
600
|
+
# s.var
|
601
|
+
# # => 1.0
|
211
602
|
def var(ddof: 1)
|
212
603
|
if !is_numeric
|
213
604
|
nil
|
@@ -216,37 +607,160 @@ module Polars
|
|
216
607
|
end
|
217
608
|
end
|
218
609
|
|
610
|
+
# Get the median of this Series.
|
611
|
+
#
|
612
|
+
# @return [Float, nil]
|
613
|
+
#
|
614
|
+
# @example
|
615
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
616
|
+
# s.median
|
617
|
+
# # => 2.0
|
219
618
|
def median
|
220
619
|
_s.median
|
221
620
|
end
|
222
621
|
|
622
|
+
# Get the quantile value of this Series.
|
623
|
+
#
|
624
|
+
# @param quantile [Float, nil]
|
625
|
+
# Quantile between 0.0 and 1.0.
|
626
|
+
# @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
|
627
|
+
# Interpolation method.
|
628
|
+
#
|
629
|
+
# @return [Float, nil]
|
630
|
+
#
|
631
|
+
# @example
|
632
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
633
|
+
# s.quantile(0.5)
|
634
|
+
# # => 2.0
|
223
635
|
def quantile(quantile, interpolation: "nearest")
|
224
636
|
_s.quantile(quantile, interpolation)
|
225
637
|
end
|
226
638
|
|
639
|
+
# Get dummy variables.
|
640
|
+
#
|
641
|
+
# @return [DataFrame]
|
642
|
+
#
|
643
|
+
# @example
|
644
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
645
|
+
# s.to_dummies
|
646
|
+
# # =>
|
647
|
+
# # shape: (3, 3)
|
648
|
+
# # ┌─────┬─────┬─────┐
|
649
|
+
# # │ a_1 ┆ a_2 ┆ a_3 │
|
650
|
+
# # │ --- ┆ --- ┆ --- │
|
651
|
+
# # │ u8 ┆ u8 ┆ u8 │
|
652
|
+
# # ╞═════╪═════╪═════╡
|
653
|
+
# # │ 1 ┆ 0 ┆ 0 │
|
654
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
655
|
+
# # │ 0 ┆ 1 ┆ 0 │
|
656
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
|
657
|
+
# # │ 0 ┆ 0 ┆ 1 │
|
658
|
+
# # └─────┴─────┴─────┘
|
227
659
|
def to_dummies
|
228
660
|
Utils.wrap_df(_s.to_dummies)
|
229
661
|
end
|
230
662
|
|
663
|
+
# Count the unique values in a Series.
|
664
|
+
#
|
665
|
+
# @param sort [Boolean]
|
666
|
+
# Ensure the output is sorted from most values to least.
|
667
|
+
#
|
668
|
+
# @return [DataFrame]
|
669
|
+
#
|
670
|
+
# @example
|
671
|
+
# s = Polars::Series.new("a", [1, 2, 2, 3])
|
672
|
+
# s.value_counts.sort("a")
|
673
|
+
# # =>
|
674
|
+
# # shape: (3, 2)
|
675
|
+
# # ┌─────┬────────┐
|
676
|
+
# # │ a ┆ counts │
|
677
|
+
# # │ --- ┆ --- │
|
678
|
+
# # │ i64 ┆ u32 │
|
679
|
+
# # ╞═════╪════════╡
|
680
|
+
# # │ 1 ┆ 1 │
|
681
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
|
682
|
+
# # │ 2 ┆ 2 │
|
683
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
|
684
|
+
# # │ 3 ┆ 1 │
|
685
|
+
# # └─────┴────────┘
|
231
686
|
def value_counts(sort: false)
|
232
687
|
Utils.wrap_df(_s.value_counts(sort))
|
233
688
|
end
|
234
689
|
|
235
|
-
#
|
236
|
-
#
|
690
|
+
# Return a count of the unique values in the order of appearance.
|
691
|
+
#
|
692
|
+
# @return [Series]
|
693
|
+
#
|
694
|
+
# @example
|
695
|
+
# s = Polars::Series.new("id", ["a", "b", "b", "c", "c", "c"])
|
696
|
+
# s.unique_counts
|
697
|
+
# # =>
|
698
|
+
# # shape: (3,)
|
699
|
+
# # Series: 'id' [u32]
|
700
|
+
# # [
|
701
|
+
# # 1
|
702
|
+
# # 2
|
703
|
+
# # 3
|
704
|
+
# # ]
|
705
|
+
def unique_counts
|
706
|
+
super
|
707
|
+
end
|
237
708
|
|
238
|
-
#
|
239
|
-
#
|
709
|
+
# Computes the entropy.
|
710
|
+
#
|
711
|
+
# Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
|
712
|
+
#
|
713
|
+
# @param base [Float]
|
714
|
+
# Given base, defaults to `e`
|
715
|
+
# @param normalize [Boolean]
|
716
|
+
# Normalize pk if it doesn't sum to 1.
|
717
|
+
#
|
718
|
+
# @return [Float, nil]
|
719
|
+
#
|
720
|
+
# @example
|
721
|
+
# a = Polars::Series.new([0.99, 0.005, 0.005])
|
722
|
+
# a.entropy(normalize: true)
|
723
|
+
# # => 0.06293300616044681
|
724
|
+
#
|
725
|
+
# @example
|
726
|
+
# b = Polars::Series.new([0.65, 0.10, 0.25])
|
727
|
+
# b.entropy(normalize: true)
|
728
|
+
# # => 0.8568409950394724
|
729
|
+
def entropy(base: Math::E, normalize: false)
|
730
|
+
Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
|
731
|
+
end
|
240
732
|
|
241
733
|
# def cumulative_eval
|
242
734
|
# end
|
243
735
|
|
736
|
+
# Return a copy of the Series with a new alias/name.
|
737
|
+
#
|
738
|
+
# @param name [String]
|
739
|
+
# New name.
|
740
|
+
#
|
741
|
+
# @return [Series]
|
742
|
+
#
|
743
|
+
# @example
|
744
|
+
# s = Polars::Series.new("x", [1, 2, 3])
|
745
|
+
# s.alias("y")
|
244
746
|
def alias(name)
|
245
747
|
s = dup
|
246
748
|
s._s.rename(name)
|
247
749
|
s
|
248
750
|
end
|
249
751
|
|
752
|
+
# Rename this Series.
|
753
|
+
#
|
754
|
+
# @param name [String]
|
755
|
+
# New name.
|
756
|
+
# @param in_place [Boolean]
|
757
|
+
# Modify the Series in-place.
|
758
|
+
#
|
759
|
+
# @return [Series]
|
760
|
+
#
|
761
|
+
# @example
|
762
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
763
|
+
# s.rename("b")
|
250
764
|
def rename(name, in_place: false)
|
251
765
|
if in_place
|
252
766
|
_s.rename(name)
|
@@ -256,59 +770,365 @@ module Polars
|
|
256
770
|
end
|
257
771
|
end
|
258
772
|
|
773
|
+
# Get the length of each individual chunk.
|
774
|
+
#
|
775
|
+
# @return [Array]
|
776
|
+
#
|
777
|
+
# @example
|
778
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
779
|
+
# s2 = Polars::Series.new("b", [4, 5, 6])
|
780
|
+
#
|
781
|
+
# @example Concatenate Series with rechunk: true
|
782
|
+
# Polars.concat([s, s2]).chunk_lengths
|
783
|
+
# # => [6]
|
784
|
+
#
|
785
|
+
# @example Concatenate Series with rechunk: false
|
786
|
+
# Polars.concat([s, s2], rechunk: false).chunk_lengths
|
787
|
+
# # => [3, 3]
|
259
788
|
def chunk_lengths
|
260
789
|
_s.chunk_lengths
|
261
790
|
end
|
262
791
|
|
792
|
+
# Get the number of chunks that this Series contains.
|
793
|
+
#
|
794
|
+
# @return [Integer]
|
795
|
+
#
|
796
|
+
# @example
|
797
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
798
|
+
# s2 = Polars::Series.new("b", [4, 5, 6])
|
799
|
+
#
|
800
|
+
# @example Concatenate Series with rechunk: true
|
801
|
+
# Polars.concat([s, s2]).n_chunks
|
802
|
+
# # => 1
|
803
|
+
#
|
804
|
+
# @example Concatenate Series with rechunk: false
|
805
|
+
# Polars.concat([s, s2], rechunk: false).n_chunks
|
806
|
+
# # => 2
|
263
807
|
def n_chunks
|
264
808
|
_s.n_chunks
|
265
809
|
end
|
266
810
|
|
811
|
+
# Get an array with the cumulative sum computed at every element.
|
812
|
+
#
|
813
|
+
# @param reverse [Boolean]
|
814
|
+
# reverse the operation.
|
815
|
+
#
|
816
|
+
# @return [Series]
|
817
|
+
#
|
818
|
+
# @note
|
819
|
+
# Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
820
|
+
# `:i64` before summing to prevent overflow issues.
|
821
|
+
#
|
822
|
+
# @example
|
823
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
824
|
+
# s.cumsum
|
825
|
+
# # =>
|
826
|
+
# # shape: (3,)
|
827
|
+
# # Series: 'a' [i64]
|
828
|
+
# # [
|
829
|
+
# # 1
|
830
|
+
# # 3
|
831
|
+
# # 6
|
832
|
+
# # ]
|
267
833
|
def cumsum(reverse: false)
|
268
|
-
|
834
|
+
super
|
269
835
|
end
|
270
836
|
|
837
|
+
# Get an array with the cumulative min computed at every element.
|
838
|
+
#
|
839
|
+
# @param reverse [Boolean]
|
840
|
+
# reverse the operation.
|
841
|
+
#
|
842
|
+
# @return [Series]
|
843
|
+
#
|
844
|
+
# @example
|
845
|
+
# s = Polars::Series.new("a", [3, 5, 1])
|
846
|
+
# s.cummin
|
847
|
+
# # =>
|
848
|
+
# # shape: (3,)
|
849
|
+
# # Series: 'a' [i64]
|
850
|
+
# # [
|
851
|
+
# # 3
|
852
|
+
# # 3
|
853
|
+
# # 1
|
854
|
+
# # ]
|
271
855
|
def cummin(reverse: false)
|
272
|
-
|
856
|
+
super
|
273
857
|
end
|
274
858
|
|
859
|
+
# Get an array with the cumulative max computed at every element.
|
860
|
+
#
|
861
|
+
# @param reverse [Boolean]
|
862
|
+
# reverse the operation.
|
863
|
+
#
|
864
|
+
# @return [Series]
|
865
|
+
#
|
866
|
+
# @example
|
867
|
+
# s = Polars::Series.new("a", [3, 5, 1])
|
868
|
+
# s.cummax
|
869
|
+
# # =>
|
870
|
+
# # shape: (3,)
|
871
|
+
# # Series: 'a' [i64]
|
872
|
+
# # [
|
873
|
+
# # 3
|
874
|
+
# # 5
|
875
|
+
# # 5
|
876
|
+
# # ]
|
275
877
|
def cummax(reverse: false)
|
276
|
-
|
878
|
+
super
|
277
879
|
end
|
278
880
|
|
881
|
+
# Get an array with the cumulative product computed at every element.
|
882
|
+
#
|
883
|
+
# @param reverse [Boolean]
|
884
|
+
# reverse the operation.
|
885
|
+
#
|
886
|
+
# @return [Series]
|
887
|
+
#
|
888
|
+
# @note
|
889
|
+
# Dtypes `:i8`, `:u8`, `:i16`, and `:u16` are cast to
|
890
|
+
# `:i64` before multiplying to prevent overflow issues.
|
891
|
+
#
|
892
|
+
# @example
|
893
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
894
|
+
# s.cumprod
|
895
|
+
# # =>
|
896
|
+
# # shape: (3,)
|
897
|
+
# # Series: 'a' [i64]
|
898
|
+
# # [
|
899
|
+
# # 1
|
900
|
+
# # 2
|
901
|
+
# # 6
|
902
|
+
# # ]
|
279
903
|
def cumprod(reverse: false)
|
280
|
-
|
904
|
+
super
|
281
905
|
end
|
282
906
|
|
907
|
+
# Get the first `n` rows.
|
908
|
+
#
|
909
|
+
# Alias for {#head}.
|
910
|
+
#
|
911
|
+
# @param n [Integer]
|
912
|
+
# Number of rows to return.
|
913
|
+
#
|
914
|
+
# @return [Series]
|
915
|
+
#
|
916
|
+
# @example
|
917
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
918
|
+
# s.limit(2)
|
919
|
+
# # =>
|
920
|
+
# # shape: (2,)
|
921
|
+
# # Series: 'a' [i64]
|
922
|
+
# # [
|
923
|
+
# # 1
|
924
|
+
# # 2
|
925
|
+
# # ]
|
283
926
|
def limit(n = 10)
|
284
927
|
to_frame.select(Utils.col(name).limit(n)).to_series
|
285
928
|
end
|
286
929
|
|
930
|
+
# Get a slice of this Series.
|
931
|
+
#
|
932
|
+
# @param offset [Integer]
|
933
|
+
# Start index. Negative indexing is supported.
|
934
|
+
# @param length [Integer, nil]
|
935
|
+
# Length of the slice. If set to `nil`, all rows starting at the offset
|
936
|
+
# will be selected.
|
937
|
+
#
|
938
|
+
# @return [Series]
|
939
|
+
#
|
940
|
+
# @example
|
941
|
+
# s = Polars::Series.new("a", [1, 2, 3, 4])
|
942
|
+
# s.slice(1, 2)
|
943
|
+
# # =>
|
944
|
+
# # shape: (2,)
|
945
|
+
# # Series: 'a' [i64]
|
946
|
+
# # [
|
947
|
+
# # 2
|
948
|
+
# # 3
|
949
|
+
# # ]
|
287
950
|
def slice(offset, length = nil)
|
288
|
-
|
289
|
-
Utils.wrap_s(_s.slice(offset, length))
|
951
|
+
super
|
290
952
|
end
|
291
953
|
|
292
|
-
|
293
|
-
|
954
|
+
# Append a Series to this one.
|
955
|
+
#
|
956
|
+
# @param other [Series]
|
957
|
+
# Series to append.
|
958
|
+
# @param append_chunks [Boolean]
|
959
|
+
# If set to `true` the append operation will add the chunks from `other` to
|
960
|
+
# self. This is super cheap.
|
961
|
+
#
|
962
|
+
# If set to `false` the append operation will do the same as
|
963
|
+
# {DataFrame#extend} which extends the memory backed by this Series with
|
964
|
+
# the values from `other`.
|
965
|
+
#
|
966
|
+
# Different from `append_chunks`, `extend` appends the data from `other` to
|
967
|
+
# the underlying memory locations and thus may cause a reallocation (which is
|
968
|
+
# expensive).
|
969
|
+
#
|
970
|
+
# If this does not cause a reallocation, the resulting data structure will not
|
971
|
+
# have any extra chunks and thus will yield faster queries.
|
972
|
+
#
|
973
|
+
# Prefer `extend` over `append_chunks` when you want to do a query after a
|
974
|
+
# single append. For instance during online operations where you add `n` rows
|
975
|
+
# and rerun a query.
|
976
|
+
#
|
977
|
+
# Prefer `append_chunks` over `extend` when you want to append many times
|
978
|
+
# before doing a query. For instance, when you read in multiple files and when
|
979
|
+
# to store them in a single Series. In the latter case, finish the sequence
|
980
|
+
# of `append_chunks` operations with a `rechunk`.
|
981
|
+
#
|
982
|
+
# @return [Series]
|
983
|
+
#
|
984
|
+
# @example
|
985
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
986
|
+
# s2 = Polars::Series.new("b", [4, 5, 6])
|
987
|
+
# s.append(s2)
|
988
|
+
# # =>
|
989
|
+
# # shape: (6,)
|
990
|
+
# # Series: 'a' [i64]
|
991
|
+
# # [
|
992
|
+
# # 1
|
993
|
+
# # 2
|
994
|
+
# # 3
|
995
|
+
# # 4
|
996
|
+
# # 5
|
997
|
+
# # 6
|
998
|
+
# # ]
|
999
|
+
def append(other, append_chunks: true)
|
1000
|
+
begin
|
1001
|
+
if append_chunks
|
1002
|
+
_s.append(other._s)
|
1003
|
+
else
|
1004
|
+
_s.extend(other._s)
|
1005
|
+
end
|
1006
|
+
rescue => e
|
1007
|
+
if e.message == "Already mutably borrowed"
|
1008
|
+
append(other.clone, append_chunks)
|
1009
|
+
else
|
1010
|
+
raise e
|
1011
|
+
end
|
1012
|
+
end
|
294
1013
|
self
|
295
1014
|
end
|
296
1015
|
|
1016
|
+
# Filter elements by a boolean mask.
|
1017
|
+
#
|
1018
|
+
# @param predicate [Series, Array]
|
1019
|
+
# Boolean mask.
|
1020
|
+
#
|
1021
|
+
# @return [Series]
|
1022
|
+
#
|
1023
|
+
# @example
|
1024
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1025
|
+
# mask = Polars::Series.new("", [true, false, true])
|
1026
|
+
# s.filter(mask)
|
1027
|
+
# # =>
|
1028
|
+
# # shape: (2,)
|
1029
|
+
# # Series: 'a' [i64]
|
1030
|
+
# # [
|
1031
|
+
# # 1
|
1032
|
+
# # 3
|
1033
|
+
# # ]
|
297
1034
|
def filter(predicate)
|
1035
|
+
if predicate.is_a?(Array)
|
1036
|
+
predicate = Series.new("", predicate)
|
1037
|
+
end
|
298
1038
|
Utils.wrap_s(_s.filter(predicate._s))
|
299
1039
|
end
|
300
1040
|
|
1041
|
+
# Get the first `n` rows.
|
1042
|
+
#
|
1043
|
+
# @param n [Integer]
|
1044
|
+
# Number of rows to return.
|
1045
|
+
#
|
1046
|
+
# @return [Series]
|
1047
|
+
#
|
1048
|
+
# @example
|
1049
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1050
|
+
# s.head(2)
|
1051
|
+
# # =>
|
1052
|
+
# # shape: (2,)
|
1053
|
+
# # Series: 'a' [i64]
|
1054
|
+
# # [
|
1055
|
+
# # 1
|
1056
|
+
# # 2
|
1057
|
+
# # ]
|
301
1058
|
def head(n = 10)
|
302
1059
|
to_frame.select(Utils.col(name).head(n)).to_series
|
303
1060
|
end
|
304
1061
|
|
1062
|
+
# Get the last `n` rows.
|
1063
|
+
#
|
1064
|
+
# @param n [Integer]
|
1065
|
+
# Number of rows to return.
|
1066
|
+
#
|
1067
|
+
# @return [Series]
|
1068
|
+
#
|
1069
|
+
# @example
|
1070
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1071
|
+
# s.tail(2)
|
1072
|
+
# # =>
|
1073
|
+
# # shape: (2,)
|
1074
|
+
# # Series: 'a' [i64]
|
1075
|
+
# # [
|
1076
|
+
# # 2
|
1077
|
+
# # 3
|
1078
|
+
# # ]
|
305
1079
|
def tail(n = 10)
|
306
1080
|
to_frame.select(Utils.col(name).tail(n)).to_series
|
307
1081
|
end
|
308
1082
|
|
309
|
-
#
|
310
|
-
#
|
1083
|
+
# Take every nth value in the Series and return as new Series.
|
1084
|
+
#
|
1085
|
+
# @return [Series]
|
1086
|
+
#
|
1087
|
+
# @example
|
1088
|
+
# s = Polars::Series.new("a", [1, 2, 3, 4])
|
1089
|
+
# s.take_every(2)
|
1090
|
+
# # =>
|
1091
|
+
# # shape: (2,)
|
1092
|
+
# # Series: 'a' [i64]
|
1093
|
+
# # [
|
1094
|
+
# # 1
|
1095
|
+
# # 3
|
1096
|
+
# # ]
|
1097
|
+
def take_every(n)
|
1098
|
+
super
|
1099
|
+
end
|
311
1100
|
|
1101
|
+
# Sort this Series.
|
1102
|
+
#
|
1103
|
+
# @param reverse [Boolean]
|
1104
|
+
# Reverse sort.
|
1105
|
+
# @param in_place [Boolean]
|
1106
|
+
# Sort in place.
|
1107
|
+
#
|
1108
|
+
# @return [Series]
|
1109
|
+
#
|
1110
|
+
# @example
|
1111
|
+
# s = Polars::Series.new("a", [1, 3, 4, 2])
|
1112
|
+
# s.sort
|
1113
|
+
# # =>
|
1114
|
+
# # shape: (4,)
|
1115
|
+
# # Series: 'a' [i64]
|
1116
|
+
# # [
|
1117
|
+
# # 1
|
1118
|
+
# # 2
|
1119
|
+
# # 3
|
1120
|
+
# # 4
|
1121
|
+
# # ]
|
1122
|
+
# s.sort(reverse: true)
|
1123
|
+
# # =>
|
1124
|
+
# # shape: (4,)
|
1125
|
+
# # Series: 'a' [i64]
|
1126
|
+
# # [
|
1127
|
+
# # 4
|
1128
|
+
# # 3
|
1129
|
+
# # 2
|
1130
|
+
# # 1
|
1131
|
+
# # ]
|
312
1132
|
def sort(reverse: false, in_place: false)
|
313
1133
|
if in_place
|
314
1134
|
self._s = _s.sort(reverse)
|
@@ -318,128 +1138,561 @@ module Polars
|
|
318
1138
|
end
|
319
1139
|
end
|
320
1140
|
|
321
|
-
#
|
322
|
-
#
|
1141
|
+
# Return the `k` largest elements.
|
1142
|
+
#
|
1143
|
+
# If `reverse: true`, the smallest elements will be given.
|
1144
|
+
#
|
1145
|
+
# @param k [Integer]
|
1146
|
+
# Number of elements to return.
|
1147
|
+
# @param reverse [Boolean]
|
1148
|
+
# Return the smallest elements.
|
1149
|
+
#
|
1150
|
+
# @return [Boolean]
|
1151
|
+
def top_k(k: 5, reverse: false)
|
1152
|
+
super
|
1153
|
+
end
|
323
1154
|
|
324
|
-
#
|
325
|
-
#
|
1155
|
+
# Get the index values that would sort this Series.
|
1156
|
+
#
|
1157
|
+
# @param reverse [Boolean]
|
1158
|
+
# Sort in reverse (descending) order.
|
1159
|
+
# @param nulls_last [Boolean]
|
1160
|
+
# Place null values last instead of first.
|
1161
|
+
#
|
1162
|
+
# @return [Series]
|
1163
|
+
#
|
1164
|
+
# @example
|
1165
|
+
# s = Polars::Series.new("a", [5, 3, 4, 1, 2])
|
1166
|
+
# s.arg_sort
|
1167
|
+
# # =>
|
1168
|
+
# # shape: (5,)
|
1169
|
+
# # Series: 'a' [u32]
|
1170
|
+
# # [
|
1171
|
+
# # 3
|
1172
|
+
# # 4
|
1173
|
+
# # 1
|
1174
|
+
# # 2
|
1175
|
+
# # 0
|
1176
|
+
# # ]
|
1177
|
+
def arg_sort(reverse: false, nulls_last: false)
|
1178
|
+
super
|
1179
|
+
end
|
326
1180
|
|
327
|
-
#
|
328
|
-
#
|
1181
|
+
# Get the index values that would sort this Series.
|
1182
|
+
#
|
1183
|
+
# Alias for {#arg_sort}.
|
1184
|
+
#
|
1185
|
+
# @param reverse [Boolean]
|
1186
|
+
# Sort in reverse (descending) order.
|
1187
|
+
# @param nulls_last [Boolean]
|
1188
|
+
# Place null values last instead of first.
|
1189
|
+
#
|
1190
|
+
# @return [Series]
|
1191
|
+
def argsort(reverse: false, nulls_last: false)
|
1192
|
+
super
|
1193
|
+
end
|
329
1194
|
|
330
|
-
#
|
331
|
-
#
|
1195
|
+
# Get unique index as Series.
|
1196
|
+
#
|
1197
|
+
# @return [Series]
|
1198
|
+
#
|
1199
|
+
# @example
|
1200
|
+
# s = Polars::Series.new("a", [1, 2, 2, 3])
|
1201
|
+
# s.arg_unique
|
1202
|
+
# # =>
|
1203
|
+
# # shape: (3,)
|
1204
|
+
# # Series: 'a' [u32]
|
1205
|
+
# # [
|
1206
|
+
# # 0
|
1207
|
+
# # 1
|
1208
|
+
# # 3
|
1209
|
+
# # ]
|
1210
|
+
def arg_unique
|
1211
|
+
super
|
1212
|
+
end
|
332
1213
|
|
1214
|
+
# Get the index of the minimal value.
|
1215
|
+
#
|
1216
|
+
# @return [Integer, nil]
|
1217
|
+
#
|
1218
|
+
# @example
|
1219
|
+
# s = Polars::Series.new("a", [3, 2, 1])
|
1220
|
+
# s.arg_min
|
1221
|
+
# # => 2
|
333
1222
|
def arg_min
|
334
1223
|
_s.arg_min
|
335
1224
|
end
|
336
1225
|
|
1226
|
+
# Get the index of the maximal value.
|
1227
|
+
#
|
1228
|
+
# @return [Integer, nil]
|
1229
|
+
#
|
1230
|
+
# @example
|
1231
|
+
# s = Polars::Series.new("a", [3, 2, 1])
|
1232
|
+
# s.arg_max
|
1233
|
+
# # => 0
|
337
1234
|
def arg_max
|
338
1235
|
_s.arg_max
|
339
1236
|
end
|
340
1237
|
|
341
|
-
#
|
342
|
-
#
|
343
|
-
|
344
|
-
#
|
345
|
-
#
|
346
|
-
|
347
|
-
|
348
|
-
|
1238
|
+
# Find indices where elements should be inserted to maintain order.
|
1239
|
+
#
|
1240
|
+
# @param element [Object]
|
1241
|
+
# Expression or scalar value.
|
1242
|
+
#
|
1243
|
+
# @return [Integer]
|
1244
|
+
def search_sorted(element)
|
1245
|
+
Polars.select(Polars.lit(self).search_sorted(element))[0, 0]
|
1246
|
+
end
|
1247
|
+
|
1248
|
+
# Get unique elements in series.
|
1249
|
+
#
|
1250
|
+
# @param maintain_order [Boolean]
|
1251
|
+
# Maintain order of data. This requires more work.
|
1252
|
+
#
|
1253
|
+
# @return [Series]
|
1254
|
+
#
|
1255
|
+
# @example
|
1256
|
+
# s = Polars::Series.new("a", [1, 2, 2, 3])
|
1257
|
+
# s.unique.sort
|
1258
|
+
# # =>
|
1259
|
+
# # shape: (3,)
|
1260
|
+
# # Series: 'a' [i64]
|
1261
|
+
# # [
|
1262
|
+
# # 1
|
1263
|
+
# # 2
|
1264
|
+
# # 3
|
1265
|
+
# # ]
|
1266
|
+
def unique(maintain_order: false)
|
1267
|
+
super
|
1268
|
+
end
|
349
1269
|
|
1270
|
+
# Take values by index.
|
1271
|
+
#
|
1272
|
+
# @param indices [Array]
|
1273
|
+
# Index location used for selection.
|
1274
|
+
#
|
1275
|
+
# @return [Series]
|
1276
|
+
#
|
1277
|
+
# @example
|
1278
|
+
# s = Polars::Series.new("a", [1, 2, 3, 4])
|
1279
|
+
# s.take([1, 3])
|
1280
|
+
# # =>
|
1281
|
+
# # shape: (2,)
|
1282
|
+
# # Series: 'a' [i64]
|
1283
|
+
# # [
|
1284
|
+
# # 2
|
1285
|
+
# # 4
|
1286
|
+
# # ]
|
1287
|
+
def take(indices)
|
1288
|
+
to_frame.select(Polars.col(name).take(indices)).to_series
|
1289
|
+
end
|
1290
|
+
|
1291
|
+
# Count the null values in this Series.
|
1292
|
+
#
|
1293
|
+
# @return [Integer]
|
350
1294
|
def null_count
|
351
1295
|
_s.null_count
|
352
1296
|
end
|
353
1297
|
|
1298
|
+
# Return `true` if the Series has a validity bitmask.
|
1299
|
+
#
|
1300
|
+
# If there is none, it means that there are no null values.
|
1301
|
+
# Use this to swiftly assert a Series does not have null values.
|
1302
|
+
#
|
1303
|
+
# @return [Boolean]
|
354
1304
|
def has_validity
|
355
1305
|
_s.has_validity
|
356
1306
|
end
|
357
1307
|
|
1308
|
+
# Check if the Series is empty.
|
1309
|
+
#
|
1310
|
+
# @return [Boolean]
|
1311
|
+
#
|
1312
|
+
# @example
|
1313
|
+
# s = Polars::Series.new("a", [])
|
1314
|
+
# s.is_empty
|
1315
|
+
# # => true
|
358
1316
|
def is_empty
|
359
1317
|
len == 0
|
360
1318
|
end
|
361
1319
|
alias_method :empty?, :is_empty
|
362
1320
|
|
363
|
-
#
|
364
|
-
#
|
1321
|
+
# Returns a boolean Series indicating which values are null.
|
1322
|
+
#
|
1323
|
+
# @return [Series]
|
1324
|
+
#
|
1325
|
+
# @example
|
1326
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
|
1327
|
+
# s.is_null
|
1328
|
+
# # =>
|
1329
|
+
# # shape: (4,)
|
1330
|
+
# # Series: 'a' [bool]
|
1331
|
+
# # [
|
1332
|
+
# # false
|
1333
|
+
# # false
|
1334
|
+
# # false
|
1335
|
+
# # true
|
1336
|
+
# # ]
|
1337
|
+
def is_null
|
1338
|
+
super
|
1339
|
+
end
|
365
1340
|
|
366
|
-
#
|
367
|
-
#
|
1341
|
+
# Returns a boolean Series indicating which values are not null.
|
1342
|
+
#
|
1343
|
+
# @return [Series]
|
1344
|
+
#
|
1345
|
+
# @example
|
1346
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
|
1347
|
+
# s.is_not_null
|
1348
|
+
# # =>
|
1349
|
+
# # shape: (4,)
|
1350
|
+
# # Series: 'a' [bool]
|
1351
|
+
# # [
|
1352
|
+
# # true
|
1353
|
+
# # true
|
1354
|
+
# # true
|
1355
|
+
# # false
|
1356
|
+
# # ]
|
1357
|
+
def is_not_null
|
1358
|
+
super
|
1359
|
+
end
|
368
1360
|
|
369
|
-
#
|
370
|
-
#
|
1361
|
+
# Returns a boolean Series indicating which values are finite.
|
1362
|
+
#
|
1363
|
+
# @return [Series]
|
1364
|
+
#
|
1365
|
+
# @example
|
1366
|
+
# s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
|
1367
|
+
# s.is_finite
|
1368
|
+
# # =>
|
1369
|
+
# # shape: (3,)
|
1370
|
+
# # Series: 'a' [bool]
|
1371
|
+
# # [
|
1372
|
+
# # true
|
1373
|
+
# # true
|
1374
|
+
# # false
|
1375
|
+
# # ]
|
1376
|
+
def is_finite
|
1377
|
+
super
|
1378
|
+
end
|
371
1379
|
|
372
|
-
#
|
373
|
-
#
|
1380
|
+
# Returns a boolean Series indicating which values are infinite.
|
1381
|
+
#
|
1382
|
+
# @return [Series]
|
1383
|
+
#
|
1384
|
+
# @example
|
1385
|
+
# s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
|
1386
|
+
# s.is_infinite
|
1387
|
+
# # =>
|
1388
|
+
# # shape: (3,)
|
1389
|
+
# # Series: 'a' [bool]
|
1390
|
+
# # [
|
1391
|
+
# # false
|
1392
|
+
# # false
|
1393
|
+
# # true
|
1394
|
+
# # ]
|
1395
|
+
def is_infinite
|
1396
|
+
super
|
1397
|
+
end
|
374
1398
|
|
375
|
-
#
|
376
|
-
#
|
1399
|
+
# Returns a boolean Series indicating which values are NaN.
|
1400
|
+
#
|
1401
|
+
# @return [Series]
|
1402
|
+
#
|
1403
|
+
# @example
|
1404
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
|
1405
|
+
# s.is_nan
|
1406
|
+
# # =>
|
1407
|
+
# # shape: (4,)
|
1408
|
+
# # Series: 'a' [bool]
|
1409
|
+
# # [
|
1410
|
+
# # false
|
1411
|
+
# # false
|
1412
|
+
# # false
|
1413
|
+
# # true
|
1414
|
+
# # ]
|
1415
|
+
def is_nan
|
1416
|
+
super
|
1417
|
+
end
|
377
1418
|
|
378
|
-
#
|
379
|
-
#
|
1419
|
+
# Returns a boolean Series indicating which values are not NaN.
|
1420
|
+
#
|
1421
|
+
# @return [Series]
|
1422
|
+
#
|
1423
|
+
# @example
|
1424
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
|
1425
|
+
# s.is_not_nan
|
1426
|
+
# # =>
|
1427
|
+
# # shape: (4,)
|
1428
|
+
# # Series: 'a' [bool]
|
1429
|
+
# # [
|
1430
|
+
# # true
|
1431
|
+
# # true
|
1432
|
+
# # true
|
1433
|
+
# # false
|
1434
|
+
# # ]
|
1435
|
+
def is_not_nan
|
1436
|
+
super
|
1437
|
+
end
|
380
1438
|
|
381
1439
|
# def is_in
|
382
1440
|
# end
|
383
1441
|
|
384
|
-
#
|
385
|
-
#
|
386
|
-
|
387
|
-
#
|
388
|
-
#
|
1442
|
+
# Get index values where Boolean Series evaluate `true`.
|
1443
|
+
#
|
1444
|
+
# @return [Series]
|
1445
|
+
#
|
1446
|
+
# @example
|
1447
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1448
|
+
# (s == 2).arg_true
|
1449
|
+
# # =>
|
1450
|
+
# # shape: (1,)
|
1451
|
+
# # Series: 'a' [u32]
|
1452
|
+
# # [
|
1453
|
+
# # 1
|
1454
|
+
# # ]
|
1455
|
+
def arg_true
|
1456
|
+
Polars.arg_where(self, eager: true)
|
1457
|
+
end
|
1458
|
+
|
1459
|
+
# Get mask of all unique values.
|
1460
|
+
#
|
1461
|
+
# @return [Series]
|
1462
|
+
#
|
1463
|
+
# @example
|
1464
|
+
# s = Polars::Series.new("a", [1, 2, 2, 3])
|
1465
|
+
# s.is_unique
|
1466
|
+
# # =>
|
1467
|
+
# # shape: (4,)
|
1468
|
+
# # Series: 'a' [bool]
|
1469
|
+
# # [
|
1470
|
+
# # true
|
1471
|
+
# # false
|
1472
|
+
# # false
|
1473
|
+
# # true
|
1474
|
+
# # ]
|
1475
|
+
def is_unique
|
1476
|
+
super
|
1477
|
+
end
|
389
1478
|
|
390
|
-
#
|
391
|
-
#
|
1479
|
+
# Get a mask of the first unique value.
|
1480
|
+
#
|
1481
|
+
# @return [Series]
|
1482
|
+
def is_first
|
1483
|
+
super
|
1484
|
+
end
|
392
1485
|
|
393
|
-
#
|
394
|
-
#
|
1486
|
+
# Get mask of all duplicated values.
|
1487
|
+
#
|
1488
|
+
# @return [Series]
|
1489
|
+
#
|
1490
|
+
# @example
|
1491
|
+
# s = Polars::Series.new("a", [1, 2, 2, 3])
|
1492
|
+
# s.is_duplicated
|
1493
|
+
# # =>
|
1494
|
+
# # shape: (4,)
|
1495
|
+
# # Series: 'a' [bool]
|
1496
|
+
# # [
|
1497
|
+
# # false
|
1498
|
+
# # true
|
1499
|
+
# # true
|
1500
|
+
# # false
|
1501
|
+
# # ]
|
1502
|
+
def is_duplicated
|
1503
|
+
super
|
1504
|
+
end
|
395
1505
|
|
396
|
-
#
|
397
|
-
#
|
1506
|
+
# Explode a list or utf8 Series.
|
1507
|
+
#
|
1508
|
+
# This means that every item is expanded to a new row.
|
1509
|
+
#
|
1510
|
+
# @return [Series]
|
1511
|
+
#
|
1512
|
+
# @example
|
1513
|
+
# s = Polars::Series.new("a", [[1, 2], [3, 4], [9, 10]])
|
1514
|
+
# s.explode
|
1515
|
+
# # =>
|
1516
|
+
# # shape: (6,)
|
1517
|
+
# # Series: 'a' [i64]
|
1518
|
+
# # [
|
1519
|
+
# # 1
|
1520
|
+
# # 2
|
1521
|
+
# # 3
|
1522
|
+
# # 4
|
1523
|
+
# # 9
|
1524
|
+
# # 10
|
1525
|
+
# # ]
|
1526
|
+
def explode
|
1527
|
+
super
|
1528
|
+
end
|
398
1529
|
|
1530
|
+
# Check if series is equal with another Series.
|
1531
|
+
#
|
1532
|
+
# @param other [Series]
|
1533
|
+
# Series to compare with.
|
1534
|
+
# @param null_equal [Boolean]
|
1535
|
+
# Consider null values as equal.
|
1536
|
+
# @param strict [Boolean]
|
1537
|
+
# Don't allow different numerical dtypes, e.g. comparing `:u32` with a
|
1538
|
+
# `:i64` will return `false`.
|
1539
|
+
#
|
1540
|
+
# @return [Boolean]
|
1541
|
+
#
|
1542
|
+
# @example
|
1543
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1544
|
+
# s2 = Polars::Series.new("b", [4, 5, 6])
|
1545
|
+
# s.series_equal(s)
|
1546
|
+
# # => true
|
1547
|
+
# s.series_equal(s2)
|
1548
|
+
# # => false
|
399
1549
|
def series_equal(other, null_equal: false, strict: false)
|
400
1550
|
_s.series_equal(other._s, null_equal, strict)
|
401
1551
|
end
|
402
1552
|
|
1553
|
+
# Length of this Series.
|
1554
|
+
#
|
1555
|
+
# @return [Integer]
|
1556
|
+
#
|
1557
|
+
# @example
|
1558
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1559
|
+
# s.len
|
1560
|
+
# # => 3
|
403
1561
|
def len
|
404
1562
|
_s.len
|
405
1563
|
end
|
406
|
-
|
407
|
-
|
408
|
-
#
|
1564
|
+
alias_method :length, :len
|
1565
|
+
|
1566
|
+
# Cast between data types.
|
1567
|
+
#
|
1568
|
+
# @param dtype [Symbol]
|
1569
|
+
# DataType to cast to
|
1570
|
+
# @param strict [Boolean]
|
1571
|
+
# Throw an error if a cast could not be done for instance due to an overflow
|
1572
|
+
#
|
1573
|
+
# @return [Series]
|
1574
|
+
#
|
1575
|
+
# @example
|
1576
|
+
# s = Polars::Series.new("a", [true, false, true])
|
1577
|
+
# s.cast(:u32)
|
1578
|
+
# # =>
|
1579
|
+
# # shape: (3,)
|
1580
|
+
# # Series: 'a' [u32]
|
1581
|
+
# # [
|
1582
|
+
# # 1
|
1583
|
+
# # 0
|
1584
|
+
# # 1
|
1585
|
+
# # ]
|
1586
|
+
def cast(dtype, strict: true)
|
1587
|
+
super
|
1588
|
+
end
|
409
1589
|
|
410
1590
|
# def to_physical
|
411
1591
|
# end
|
412
1592
|
|
1593
|
+
# Convert this Series to a Ruby Array. This operation clones data.
|
1594
|
+
#
|
1595
|
+
# @return [Array]
|
1596
|
+
#
|
1597
|
+
# @example
|
1598
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1599
|
+
# s.to_a
|
1600
|
+
# # => [1, 2, 3]
|
413
1601
|
def to_a
|
414
1602
|
_s.to_a
|
415
1603
|
end
|
416
1604
|
|
1605
|
+
# Create a single chunk of memory for this Series.
|
1606
|
+
#
|
1607
|
+
# @param in_place [Boolean]
|
1608
|
+
# In place or not.
|
1609
|
+
#
|
1610
|
+
# @return [Series]
|
417
1611
|
def rechunk(in_place: false)
|
418
1612
|
opt_s = _s.rechunk(in_place)
|
419
1613
|
in_place ? self : Utils.wrap_s(opt_s)
|
420
1614
|
end
|
421
1615
|
|
422
|
-
#
|
423
|
-
#
|
1616
|
+
# Return Series in reverse order.
|
1617
|
+
#
|
1618
|
+
# @return [Series]
|
1619
|
+
#
|
1620
|
+
# @example
|
1621
|
+
# s = Polars::Series.new("a", [1, 2, 3], dtype: :i8)
|
1622
|
+
# s.reverse
|
1623
|
+
# # =>
|
1624
|
+
# # shape: (3,)
|
1625
|
+
# # Series: 'a' [i8]
|
1626
|
+
# # [
|
1627
|
+
# # 3
|
1628
|
+
# # 2
|
1629
|
+
# # 1
|
1630
|
+
# # ]
|
1631
|
+
def reverse
|
1632
|
+
super
|
1633
|
+
end
|
424
1634
|
|
1635
|
+
# Check if this Series datatype is numeric.
|
1636
|
+
#
|
1637
|
+
# @return [Boolean]
|
1638
|
+
#
|
1639
|
+
# @example
|
1640
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1641
|
+
# s.is_numeric
|
1642
|
+
# # => true
|
425
1643
|
def is_numeric
|
426
1644
|
[:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
|
427
1645
|
end
|
428
1646
|
alias_method :numeric?, :is_numeric
|
429
1647
|
|
430
|
-
#
|
431
|
-
#
|
432
|
-
|
1648
|
+
# Check if this Series datatype is datelike.
|
1649
|
+
#
|
1650
|
+
# @return [Boolean]
|
1651
|
+
#
|
1652
|
+
# @example
|
1653
|
+
# s = Polars::Series.new([Date.new(2021, 1, 1), Date.new(2021, 1, 2), Date.new(2021, 1, 3)])
|
1654
|
+
# s.is_datelike
|
1655
|
+
# # => true
|
1656
|
+
def is_datelike
|
1657
|
+
[:date, :datetime, :duration, :time].include?(dtype)
|
1658
|
+
end
|
1659
|
+
|
1660
|
+
# Check if this Series has floating point numbers.
|
1661
|
+
#
|
1662
|
+
# @return [Boolean]
|
1663
|
+
#
|
1664
|
+
# @example
|
1665
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0])
|
1666
|
+
# s.is_float
|
1667
|
+
# # => true
|
433
1668
|
def is_float
|
434
1669
|
[:f32, :f64].include?(dtype)
|
435
1670
|
end
|
436
1671
|
alias_method :float?, :is_float
|
437
1672
|
|
438
|
-
|
1673
|
+
# Check if this Series is a Boolean.
|
1674
|
+
#
|
1675
|
+
# @return [Boolean]
|
1676
|
+
#
|
1677
|
+
# @example
|
1678
|
+
# s = Polars::Series.new("a", [true, false, true])
|
1679
|
+
# s.is_boolean
|
1680
|
+
# # => true
|
1681
|
+
def is_boolean
|
439
1682
|
dtype == :bool
|
440
1683
|
end
|
441
|
-
alias_method :
|
442
|
-
|
1684
|
+
alias_method :boolean?, :is_boolean
|
1685
|
+
alias_method :is_bool, :is_boolean
|
1686
|
+
alias_method :bool?, :is_boolean
|
1687
|
+
|
1688
|
+
# Check if this Series datatype is a Utf8.
|
1689
|
+
#
|
1690
|
+
# @return [Boolean]
|
1691
|
+
#
|
1692
|
+
# @example
|
1693
|
+
# s = Polars::Series.new("x", ["a", "b", "c"])
|
1694
|
+
# s.is_utf8
|
1695
|
+
# # => true
|
443
1696
|
def is_utf8
|
444
1697
|
dtype == :str
|
445
1698
|
end
|
@@ -454,89 +1707,575 @@ module Polars
|
|
454
1707
|
# def set
|
455
1708
|
# end
|
456
1709
|
|
457
|
-
#
|
458
|
-
#
|
1710
|
+
# Set values at the index locations.
|
1711
|
+
#
|
1712
|
+
# @param idx [Object]
|
1713
|
+
# Integers representing the index locations.
|
1714
|
+
# @param value [Object]
|
1715
|
+
# Replacement values.
|
1716
|
+
#
|
1717
|
+
# @return [Series]
|
1718
|
+
#
|
1719
|
+
# @example
|
1720
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1721
|
+
# s.set_at_idx(1, 10)
|
1722
|
+
# # =>
|
1723
|
+
# # shape: (3,)
|
1724
|
+
# # Series: 'a' [i64]
|
1725
|
+
# # [
|
1726
|
+
# # 1
|
1727
|
+
# # 10
|
1728
|
+
# # 3
|
1729
|
+
# # ]
|
1730
|
+
def set_at_idx(idx, value)
|
1731
|
+
if idx.is_a?(Integer)
|
1732
|
+
idx = [idx]
|
1733
|
+
end
|
1734
|
+
if idx.length == 0
|
1735
|
+
return self
|
1736
|
+
end
|
459
1737
|
|
460
|
-
|
461
|
-
|
1738
|
+
idx = Series.new("", idx)
|
1739
|
+
if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(String) || value.nil?
|
1740
|
+
value = Series.new("", [value])
|
1741
|
+
|
1742
|
+
# if we need to set more than a single value, we extend it
|
1743
|
+
if idx.length > 0
|
1744
|
+
value = value.extend_constant(value[0], idx.length - 1)
|
1745
|
+
end
|
1746
|
+
elsif !value.is_a?(Series)
|
1747
|
+
value = Series.new("", value)
|
1748
|
+
end
|
1749
|
+
_s.set_at_idx(idx._s, value._s)
|
1750
|
+
self
|
1751
|
+
end
|
1752
|
+
|
1753
|
+
# Create an empty copy of the current Series.
|
1754
|
+
#
|
1755
|
+
# The copy has identical name/dtype but no data.
|
1756
|
+
#
|
1757
|
+
# @return [Series]
|
1758
|
+
#
|
1759
|
+
# @example
|
1760
|
+
# s = Polars::Series.new("a", [nil, true, false])
|
1761
|
+
# s.cleared
|
1762
|
+
# # =>
|
1763
|
+
# # shape: (0,)
|
1764
|
+
# # Series: 'a' [bool]
|
1765
|
+
# # [
|
1766
|
+
# # ]
|
1767
|
+
def cleared
|
1768
|
+
len > 0 ? limit(0) : clone
|
1769
|
+
end
|
462
1770
|
|
463
1771
|
# clone handled by initialize_copy
|
464
1772
|
|
465
|
-
#
|
466
|
-
#
|
1773
|
+
# Fill floating point NaN value with a fill value.
|
1774
|
+
#
|
1775
|
+
# @param fill_value [Object]
|
1776
|
+
# Value used to fill nan values.
|
1777
|
+
#
|
1778
|
+
# @return [Series]
|
1779
|
+
#
|
1780
|
+
# @example
|
1781
|
+
# s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
|
1782
|
+
# s.fill_nan(0)
|
1783
|
+
# # =>
|
1784
|
+
# # shape: (4,)
|
1785
|
+
# # Series: 'a' [f64]
|
1786
|
+
# # [
|
1787
|
+
# # 1.0
|
1788
|
+
# # 2.0
|
1789
|
+
# # 3.0
|
1790
|
+
# # 0.0
|
1791
|
+
# # ]
|
1792
|
+
def fill_nan(fill_value)
|
1793
|
+
super
|
1794
|
+
end
|
467
1795
|
|
468
|
-
#
|
469
|
-
#
|
1796
|
+
# Fill null values using the specified value or strategy.
|
1797
|
+
#
|
1798
|
+
# @param value [Object]
|
1799
|
+
# Value used to fill null values.
|
1800
|
+
# @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
|
1801
|
+
# Strategy used to fill null values.
|
1802
|
+
# @param limit
|
1803
|
+
# Number of consecutive null values to fill when using the "forward" or
|
1804
|
+
# "backward" strategy.
|
1805
|
+
#
|
1806
|
+
# @return [Series]
|
1807
|
+
#
|
1808
|
+
# @example
|
1809
|
+
# s = Polars::Series.new("a", [1, 2, 3, nil])
|
1810
|
+
# s.fill_null(strategy: "forward")
|
1811
|
+
# # =>
|
1812
|
+
# # shape: (4,)
|
1813
|
+
# # Series: 'a' [i64]
|
1814
|
+
# # [
|
1815
|
+
# # 1
|
1816
|
+
# # 2
|
1817
|
+
# # 3
|
1818
|
+
# # 3
|
1819
|
+
# # ]
|
1820
|
+
#
|
1821
|
+
# @example
|
1822
|
+
# s.fill_null(strategy: "min")
|
1823
|
+
# # =>
|
1824
|
+
# # shape: (4,)
|
1825
|
+
# # Series: 'a' [i64]
|
1826
|
+
# # [
|
1827
|
+
# # 1
|
1828
|
+
# # 2
|
1829
|
+
# # 3
|
1830
|
+
# # 1
|
1831
|
+
# # ]
|
1832
|
+
#
|
1833
|
+
# @example
|
1834
|
+
# s = Polars::Series.new("b", ["x", nil, "z"])
|
1835
|
+
# s.fill_null(Polars.lit(""))
|
1836
|
+
# # =>
|
1837
|
+
# # shape: (3,)
|
1838
|
+
# # Series: 'b' [str]
|
1839
|
+
# # [
|
1840
|
+
# # "x"
|
1841
|
+
# # ""
|
1842
|
+
# # "z"
|
1843
|
+
# # ]
|
1844
|
+
def fill_null(value = nil, strategy: nil, limit: nil)
|
1845
|
+
super
|
1846
|
+
end
|
470
1847
|
|
1848
|
+
# Rounds down to the nearest integer value.
|
1849
|
+
#
|
1850
|
+
# Only works on floating point Series.
|
1851
|
+
#
|
1852
|
+
# @return [Series]
|
1853
|
+
#
|
1854
|
+
# @example
|
1855
|
+
# s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
|
1856
|
+
# s.floor
|
1857
|
+
# # =>
|
1858
|
+
# # shape: (3,)
|
1859
|
+
# # Series: 'a' [f64]
|
1860
|
+
# # [
|
1861
|
+
# # 1.0
|
1862
|
+
# # 2.0
|
1863
|
+
# # 3.0
|
1864
|
+
# # ]
|
471
1865
|
def floor
|
472
1866
|
Utils.wrap_s(_s.floor)
|
473
1867
|
end
|
474
1868
|
|
1869
|
+
# Rounds up to the nearest integer value.
|
1870
|
+
#
|
1871
|
+
# Only works on floating point Series.
|
1872
|
+
#
|
1873
|
+
# @return [Series]
|
1874
|
+
#
|
1875
|
+
# @example
|
1876
|
+
# s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
|
1877
|
+
# s.ceil
|
1878
|
+
# # =>
|
1879
|
+
# # shape: (3,)
|
1880
|
+
# # Series: 'a' [f64]
|
1881
|
+
# # [
|
1882
|
+
# # 2.0
|
1883
|
+
# # 3.0
|
1884
|
+
# # 4.0
|
1885
|
+
# # ]
|
475
1886
|
def ceil
|
476
|
-
|
1887
|
+
super
|
477
1888
|
end
|
478
1889
|
|
479
|
-
#
|
1890
|
+
# Round underlying floating point data by `decimals` digits.
|
1891
|
+
#
|
1892
|
+
# @param decimals [Integer]
|
1893
|
+
# number of decimals to round by.
|
1894
|
+
#
|
1895
|
+
# @return [Series]
|
1896
|
+
#
|
1897
|
+
# @example
|
1898
|
+
# s = Polars::Series.new("a", [1.12345, 2.56789, 3.901234])
|
1899
|
+
# s.round(2)
|
1900
|
+
# # =>
|
1901
|
+
# # shape: (3,)
|
1902
|
+
# # Series: 'a' [f64]
|
1903
|
+
# # [
|
1904
|
+
# # 1.12
|
1905
|
+
# # 2.57
|
1906
|
+
# # 3.9
|
1907
|
+
# # ]
|
480
1908
|
def round(decimals = 0)
|
481
|
-
|
1909
|
+
super
|
482
1910
|
end
|
483
1911
|
|
484
1912
|
# def dot
|
485
1913
|
# end
|
486
1914
|
|
487
|
-
#
|
488
|
-
#
|
1915
|
+
# Compute the most occurring value(s).
|
1916
|
+
#
|
1917
|
+
# Can return multiple Values.
|
1918
|
+
#
|
1919
|
+
# @return [Series]
|
1920
|
+
#
|
1921
|
+
# @example
|
1922
|
+
# s = Polars::Series.new("a", [1, 2, 2, 3])
|
1923
|
+
# s.mode
|
1924
|
+
# # =>
|
1925
|
+
# # shape: (1,)
|
1926
|
+
# # Series: 'a' [i64]
|
1927
|
+
# # [
|
1928
|
+
# # 2
|
1929
|
+
# # ]
|
1930
|
+
def mode
|
1931
|
+
super
|
1932
|
+
end
|
489
1933
|
|
490
|
-
#
|
491
|
-
#
|
1934
|
+
# Compute the element-wise indication of the sign.
|
1935
|
+
#
|
1936
|
+
# @return [Series]
|
1937
|
+
#
|
1938
|
+
# @example
|
1939
|
+
# s = Polars::Series.new("a", [-9.0, -0.0, 0.0, 4.0, nil])
|
1940
|
+
# s.sign
|
1941
|
+
# # =>
|
1942
|
+
# # shape: (5,)
|
1943
|
+
# # Series: 'a' [i64]
|
1944
|
+
# # [
|
1945
|
+
# # -1
|
1946
|
+
# # 0
|
1947
|
+
# # 0
|
1948
|
+
# # 1
|
1949
|
+
# # null
|
1950
|
+
# # ]
|
1951
|
+
def sign
|
1952
|
+
super
|
1953
|
+
end
|
492
1954
|
|
493
|
-
#
|
494
|
-
#
|
1955
|
+
# Compute the element-wise value for the sine.
|
1956
|
+
#
|
1957
|
+
# @return [Series]
|
1958
|
+
#
|
1959
|
+
# @example
|
1960
|
+
# s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
|
1961
|
+
# s.sin
|
1962
|
+
# # =>
|
1963
|
+
# # shape: (3,)
|
1964
|
+
# # Series: 'a' [f64]
|
1965
|
+
# # [
|
1966
|
+
# # 0.0
|
1967
|
+
# # 1.0
|
1968
|
+
# # 1.2246e-16
|
1969
|
+
# # ]
|
1970
|
+
def sin
|
1971
|
+
super
|
1972
|
+
end
|
495
1973
|
|
496
|
-
#
|
497
|
-
#
|
1974
|
+
# Compute the element-wise value for the cosine.
|
1975
|
+
#
|
1976
|
+
# @return [Series]
|
1977
|
+
#
|
1978
|
+
# @example
|
1979
|
+
# s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
|
1980
|
+
# s.cos
|
1981
|
+
# # =>
|
1982
|
+
# # shape: (3,)
|
1983
|
+
# # Series: 'a' [f64]
|
1984
|
+
# # [
|
1985
|
+
# # 1.0
|
1986
|
+
# # 6.1232e-17
|
1987
|
+
# # -1.0
|
1988
|
+
# # ]
|
1989
|
+
def cos
|
1990
|
+
super
|
1991
|
+
end
|
498
1992
|
|
499
|
-
#
|
500
|
-
#
|
1993
|
+
# Compute the element-wise value for the tangent.
|
1994
|
+
#
|
1995
|
+
# @return [Series]
|
1996
|
+
#
|
1997
|
+
# @example
|
1998
|
+
# s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
|
1999
|
+
# s.tan
|
2000
|
+
# # =>
|
2001
|
+
# # shape: (3,)
|
2002
|
+
# # Series: 'a' [f64]
|
2003
|
+
# # [
|
2004
|
+
# # 0.0
|
2005
|
+
# # 1.6331e16
|
2006
|
+
# # -1.2246e-16
|
2007
|
+
# # ]
|
2008
|
+
def tan
|
2009
|
+
super
|
2010
|
+
end
|
501
2011
|
|
502
|
-
#
|
503
|
-
#
|
2012
|
+
# Compute the element-wise value for the inverse sine.
|
2013
|
+
#
|
2014
|
+
# @return [Series]
|
2015
|
+
#
|
2016
|
+
# @example
|
2017
|
+
# s = Polars::Series.new("a", [1.0, 0.0, -1.0])
|
2018
|
+
# s.arcsin
|
2019
|
+
# # =>
|
2020
|
+
# # shape: (3,)
|
2021
|
+
# # Series: 'a' [f64]
|
2022
|
+
# # [
|
2023
|
+
# # 1.570796
|
2024
|
+
# # 0.0
|
2025
|
+
# # -1.570796
|
2026
|
+
# # ]
|
2027
|
+
def arcsin
|
2028
|
+
super
|
2029
|
+
end
|
504
2030
|
|
505
|
-
#
|
506
|
-
#
|
2031
|
+
# Compute the element-wise value for the inverse cosine.
|
2032
|
+
#
|
2033
|
+
# @return [Series]
|
2034
|
+
#
|
2035
|
+
# @example
|
2036
|
+
# s = Polars::Series.new("a", [1.0, 0.0, -1.0])
|
2037
|
+
# s.arccos
|
2038
|
+
# # =>
|
2039
|
+
# # shape: (3,)
|
2040
|
+
# # Series: 'a' [f64]
|
2041
|
+
# # [
|
2042
|
+
# # 0.0
|
2043
|
+
# # 1.570796
|
2044
|
+
# # 3.141593
|
2045
|
+
# # ]
|
2046
|
+
def arccos
|
2047
|
+
super
|
2048
|
+
end
|
507
2049
|
|
508
|
-
#
|
509
|
-
#
|
2050
|
+
# Compute the element-wise value for the inverse tangent.
|
2051
|
+
#
|
2052
|
+
# @return [Series]
|
2053
|
+
#
|
2054
|
+
# @example
|
2055
|
+
# s = Polars::Series.new("a", [1.0, 0.0, -1.0])
|
2056
|
+
# s.arctan
|
2057
|
+
# # =>
|
2058
|
+
# # shape: (3,)
|
2059
|
+
# # Series: 'a' [f64]
|
2060
|
+
# # [
|
2061
|
+
# # 0.785398
|
2062
|
+
# # 0.0
|
2063
|
+
# # -0.785398
|
2064
|
+
# # ]
|
2065
|
+
def arctan
|
2066
|
+
super
|
2067
|
+
end
|
510
2068
|
|
511
|
-
#
|
512
|
-
#
|
2069
|
+
# Compute the element-wise value for the inverse hyperbolic sine.
|
2070
|
+
#
|
2071
|
+
# @return [Series]
|
2072
|
+
#
|
2073
|
+
# @example
|
2074
|
+
# s = Polars::Series.new("a", [1.0, 0.0, -1.0])
|
2075
|
+
# s.arcsinh
|
2076
|
+
# # =>
|
2077
|
+
# # shape: (3,)
|
2078
|
+
# # Series: 'a' [f64]
|
2079
|
+
# # [
|
2080
|
+
# # 0.881374
|
2081
|
+
# # 0.0
|
2082
|
+
# # -0.881374
|
2083
|
+
# # ]
|
2084
|
+
def arcsinh
|
2085
|
+
super
|
2086
|
+
end
|
513
2087
|
|
514
|
-
#
|
515
|
-
#
|
2088
|
+
# Compute the element-wise value for the inverse hyperbolic cosine.
|
2089
|
+
#
|
2090
|
+
# @return [Series]
|
2091
|
+
#
|
2092
|
+
# @example
|
2093
|
+
# s = Polars::Series.new("a", [5.0, 1.0, 0.0, -1.0])
|
2094
|
+
# s.arccosh
|
2095
|
+
# # =>
|
2096
|
+
# # shape: (4,)
|
2097
|
+
# # Series: 'a' [f64]
|
2098
|
+
# # [
|
2099
|
+
# # 2.292432
|
2100
|
+
# # 0.0
|
2101
|
+
# # NaN
|
2102
|
+
# # NaN
|
2103
|
+
# # ]
|
2104
|
+
def arccosh
|
2105
|
+
super
|
2106
|
+
end
|
516
2107
|
|
517
|
-
#
|
518
|
-
#
|
2108
|
+
# Compute the element-wise value for the inverse hyperbolic tangent.
|
2109
|
+
#
|
2110
|
+
# @return [Series]
|
2111
|
+
#
|
2112
|
+
# @example
|
2113
|
+
# s = Polars::Series.new("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1])
|
2114
|
+
# s.arctanh
|
2115
|
+
# # =>
|
2116
|
+
# # shape: (7,)
|
2117
|
+
# # Series: 'a' [f64]
|
2118
|
+
# # [
|
2119
|
+
# # NaN
|
2120
|
+
# # inf
|
2121
|
+
# # 0.549306
|
2122
|
+
# # 0.0
|
2123
|
+
# # -0.549306
|
2124
|
+
# # -inf
|
2125
|
+
# # NaN
|
2126
|
+
# # ]
|
2127
|
+
def arctanh
|
2128
|
+
super
|
2129
|
+
end
|
519
2130
|
|
520
|
-
#
|
521
|
-
#
|
2131
|
+
# Compute the element-wise value for the hyperbolic sine.
|
2132
|
+
#
|
2133
|
+
# @return [Series]
|
2134
|
+
#
|
2135
|
+
# @example
|
2136
|
+
# s = Polars::Series.new("a", [1.0, 0.0, -1.0])
|
2137
|
+
# s.sinh
|
2138
|
+
# # =>
|
2139
|
+
# # shape: (3,)
|
2140
|
+
# # Series: 'a' [f64]
|
2141
|
+
# # [
|
2142
|
+
# # 1.175201
|
2143
|
+
# # 0.0
|
2144
|
+
# # -1.175201
|
2145
|
+
# # ]
|
2146
|
+
def sinh
|
2147
|
+
super
|
2148
|
+
end
|
522
2149
|
|
523
|
-
#
|
524
|
-
#
|
2150
|
+
# Compute the element-wise value for the hyperbolic cosine.
|
2151
|
+
#
|
2152
|
+
# @return [Series]
|
2153
|
+
#
|
2154
|
+
# @example
|
2155
|
+
# s = Polars::Series.new("a", [1.0, 0.0, -1.0])
|
2156
|
+
# s.cosh
|
2157
|
+
# # =>
|
2158
|
+
# # shape: (3,)
|
2159
|
+
# # Series: 'a' [f64]
|
2160
|
+
# # [
|
2161
|
+
# # 1.543081
|
2162
|
+
# # 1.0
|
2163
|
+
# # 1.543081
|
2164
|
+
# # ]
|
2165
|
+
def cosh
|
2166
|
+
super
|
2167
|
+
end
|
525
2168
|
|
526
|
-
#
|
527
|
-
#
|
2169
|
+
# Compute the element-wise value for the hyperbolic tangent.
|
2170
|
+
#
|
2171
|
+
# @return [Series]
|
2172
|
+
#
|
2173
|
+
# @example
|
2174
|
+
# s = Polars::Series.new("a", [1.0, 0.0, -1.0])
|
2175
|
+
# s.tanh
|
2176
|
+
# # =>
|
2177
|
+
# # shape: (3,)
|
2178
|
+
# # Series: 'a' [f64]
|
2179
|
+
# # [
|
2180
|
+
# # 0.761594
|
2181
|
+
# # 0.0
|
2182
|
+
# # -0.761594
|
2183
|
+
# # ]
|
2184
|
+
def tanh
|
2185
|
+
super
|
2186
|
+
end
|
528
2187
|
|
529
2188
|
# def apply
|
530
2189
|
# end
|
531
2190
|
|
532
|
-
#
|
533
|
-
#
|
2191
|
+
# Shift the values by a given period.
|
2192
|
+
#
|
2193
|
+
# @param periods [Integer]
|
2194
|
+
# Number of places to shift (may be negative).
|
2195
|
+
#
|
2196
|
+
# @return [Series]
|
2197
|
+
#
|
2198
|
+
# @example
|
2199
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2200
|
+
# s.shift(1)
|
2201
|
+
# # =>
|
2202
|
+
# # shape: (3,)
|
2203
|
+
# # Series: 'a' [i64]
|
2204
|
+
# # [
|
2205
|
+
# # null
|
2206
|
+
# # 1
|
2207
|
+
# # 2
|
2208
|
+
# # ]
|
2209
|
+
#
|
2210
|
+
# @example
|
2211
|
+
# s.shift(-1)
|
2212
|
+
# # =>
|
2213
|
+
# # shape: (3,)
|
2214
|
+
# # Series: 'a' [i64]
|
2215
|
+
# # [
|
2216
|
+
# # 2
|
2217
|
+
# # 3
|
2218
|
+
# # null
|
2219
|
+
# # ]
|
2220
|
+
def shift(periods = 1)
|
2221
|
+
super
|
2222
|
+
end
|
534
2223
|
|
535
|
-
#
|
536
|
-
#
|
2224
|
+
# Shift the values by a given period and fill the resulting null values.
|
2225
|
+
#
|
2226
|
+
# @param periods [Integer]
|
2227
|
+
# Number of places to shift (may be negative).
|
2228
|
+
# @param fill_value [Object]
|
2229
|
+
# Fill None values with the result of this expression.
|
2230
|
+
#
|
2231
|
+
# @return [Series]
|
2232
|
+
def shift_and_fill(periods, fill_value)
|
2233
|
+
super
|
2234
|
+
end
|
537
2235
|
|
538
|
-
#
|
539
|
-
#
|
2236
|
+
# Take values from self or other based on the given mask.
|
2237
|
+
#
|
2238
|
+
# Where mask evaluates true, take values from self. Where mask evaluates false,
|
2239
|
+
# take values from other.
|
2240
|
+
#
|
2241
|
+
# @param mask [Series]
|
2242
|
+
# Boolean Series.
|
2243
|
+
# @param other [Series]
|
2244
|
+
# Series of same type.
|
2245
|
+
#
|
2246
|
+
# @return [Series]
|
2247
|
+
#
|
2248
|
+
# @example
|
2249
|
+
# s1 = Polars::Series.new([1, 2, 3, 4, 5])
|
2250
|
+
# s2 = Polars::Series.new([5, 4, 3, 2, 1])
|
2251
|
+
# s1.zip_with(s1 < s2, s2)
|
2252
|
+
# # =>
|
2253
|
+
# # shape: (5,)
|
2254
|
+
# # Series: '' [i64]
|
2255
|
+
# # [
|
2256
|
+
# # 1
|
2257
|
+
# # 2
|
2258
|
+
# # 3
|
2259
|
+
# # 2
|
2260
|
+
# # 1
|
2261
|
+
# # ]
|
2262
|
+
#
|
2263
|
+
# @example
|
2264
|
+
# mask = Polars::Series.new([true, false, true, false, true])
|
2265
|
+
# s1.zip_with(mask, s2)
|
2266
|
+
# # =>
|
2267
|
+
# # shape: (5,)
|
2268
|
+
# # Series: '' [i64]
|
2269
|
+
# # [
|
2270
|
+
# # 1
|
2271
|
+
# # 4
|
2272
|
+
# # 3
|
2273
|
+
# # 2
|
2274
|
+
# # 5
|
2275
|
+
# # ]
|
2276
|
+
def zip_with(mask, other)
|
2277
|
+
Utils.wrap_s(_s.zip_with(mask._s, other._s))
|
2278
|
+
end
|
540
2279
|
|
541
2280
|
# def rolling_min
|
542
2281
|
# end
|
@@ -571,62 +2310,266 @@ module Polars
|
|
571
2310
|
# def sample
|
572
2311
|
# end
|
573
2312
|
|
2313
|
+
# Get a boolean mask of the local maximum peaks.
|
2314
|
+
#
|
2315
|
+
# @return [Series]
|
2316
|
+
#
|
2317
|
+
# @example
|
2318
|
+
# s = Polars::Series.new("a", [1, 2, 3, 4, 5])
|
2319
|
+
# s.peak_max
|
2320
|
+
# # =>
|
2321
|
+
# # shape: (5,)
|
2322
|
+
# # Series: '' [bool]
|
2323
|
+
# # [
|
2324
|
+
# # false
|
2325
|
+
# # false
|
2326
|
+
# # false
|
2327
|
+
# # false
|
2328
|
+
# # true
|
2329
|
+
# # ]
|
574
2330
|
def peak_max
|
575
2331
|
Utils.wrap_s(_s.peak_max)
|
576
2332
|
end
|
577
2333
|
|
2334
|
+
# Get a boolean mask of the local minimum peaks.
|
2335
|
+
#
|
2336
|
+
# @return [Series]
|
2337
|
+
#
|
2338
|
+
# @example
|
2339
|
+
# s = Polars::Series.new("a", [4, 1, 3, 2, 5])
|
2340
|
+
# s.peak_min
|
2341
|
+
# # =>
|
2342
|
+
# # shape: (5,)
|
2343
|
+
# # Series: '' [bool]
|
2344
|
+
# # [
|
2345
|
+
# # false
|
2346
|
+
# # true
|
2347
|
+
# # false
|
2348
|
+
# # true
|
2349
|
+
# # false
|
2350
|
+
# # ]
|
578
2351
|
def peak_min
|
579
2352
|
Utils.wrap_s(_s.peak_min)
|
580
2353
|
end
|
581
2354
|
|
2355
|
+
# Count the number of unique values in this Series.
|
2356
|
+
#
|
2357
|
+
# @return [Integer]
|
2358
|
+
#
|
2359
|
+
# @example
|
2360
|
+
# s = Polars::Series.new("a", [1, 2, 2, 3])
|
2361
|
+
# s.n_unique
|
2362
|
+
# # => 3
|
582
2363
|
def n_unique
|
583
2364
|
_s.n_unique
|
584
2365
|
end
|
585
2366
|
|
586
|
-
#
|
587
|
-
#
|
2367
|
+
# Shrink Series memory usage.
|
2368
|
+
#
|
2369
|
+
# Shrinks the underlying array capacity to exactly fit the actual data.
|
2370
|
+
# (Note that this function does not change the Series data type).
|
2371
|
+
#
|
2372
|
+
# @return [Series]
|
2373
|
+
def shrink_to_fit(in_place: false)
|
2374
|
+
if in_place
|
2375
|
+
_s.shrink_to_fit
|
2376
|
+
self
|
2377
|
+
else
|
2378
|
+
series = clone
|
2379
|
+
series._s.shrink_to_fit
|
2380
|
+
series
|
2381
|
+
end
|
2382
|
+
end
|
588
2383
|
|
589
2384
|
# def _hash
|
590
2385
|
# end
|
591
2386
|
|
592
|
-
#
|
593
|
-
#
|
2387
|
+
# Reinterpret the underlying bits as a signed/unsigned integer.
|
2388
|
+
#
|
2389
|
+
# This operation is only allowed for 64bit integers. For lower bits integers,
|
2390
|
+
# you can safely use that cast operation.
|
2391
|
+
#
|
2392
|
+
# @param signed [Boolean]
|
2393
|
+
# If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
|
2394
|
+
#
|
2395
|
+
# @return [Series]
|
2396
|
+
def reinterpret(signed: true)
|
2397
|
+
super
|
2398
|
+
end
|
594
2399
|
|
595
|
-
#
|
596
|
-
#
|
2400
|
+
# Interpolate intermediate values. The interpolation method is linear.
|
2401
|
+
#
|
2402
|
+
# @return [Series]
|
2403
|
+
#
|
2404
|
+
# @example
|
2405
|
+
# s = Polars::Series.new("a", [1, 2, nil, nil, 5])
|
2406
|
+
# s.interpolate
|
2407
|
+
# # =>
|
2408
|
+
# # shape: (5,)
|
2409
|
+
# # Series: 'a' [i64]
|
2410
|
+
# # [
|
2411
|
+
# # 1
|
2412
|
+
# # 2
|
2413
|
+
# # 3
|
2414
|
+
# # 4
|
2415
|
+
# # 5
|
2416
|
+
# # ]
|
2417
|
+
def interpolate
|
2418
|
+
super
|
2419
|
+
end
|
597
2420
|
|
598
|
-
#
|
599
|
-
#
|
2421
|
+
# Compute absolute values.
|
2422
|
+
#
|
2423
|
+
# @return [Series]
|
2424
|
+
def abs
|
2425
|
+
super
|
2426
|
+
end
|
600
2427
|
|
601
2428
|
# def rank
|
602
2429
|
# end
|
603
2430
|
|
604
|
-
#
|
605
|
-
#
|
2431
|
+
# Calculate the n-th discrete difference.
|
2432
|
+
#
|
2433
|
+
# @param n [Integer]
|
2434
|
+
# Number of slots to shift.
|
2435
|
+
# @param null_behavior ["ignore", "drop"]
|
2436
|
+
# How to handle null values.
|
2437
|
+
#
|
2438
|
+
# @return [Series]
|
2439
|
+
def diff(n: 1, null_behavior: "ignore")
|
2440
|
+
super
|
2441
|
+
end
|
606
2442
|
|
607
2443
|
# def pct_change
|
608
2444
|
# end
|
609
2445
|
|
610
|
-
#
|
611
|
-
#
|
612
|
-
|
613
|
-
#
|
614
|
-
#
|
615
|
-
|
616
|
-
#
|
617
|
-
#
|
2446
|
+
# Compute the sample skewness of a data set.
|
2447
|
+
#
|
2448
|
+
# For normally distributed data, the skewness should be about zero. For
|
2449
|
+
# unimodal continuous distributions, a skewness value greater than zero means
|
2450
|
+
# that there is more weight in the right tail of the distribution. The
|
2451
|
+
# function `skewtest` can be used to determine if the skewness value
|
2452
|
+
# is close enough to zero, statistically speaking.
|
2453
|
+
#
|
2454
|
+
# @param bias [Boolean]
|
2455
|
+
# If `false`, the calculations are corrected for statistical bias.
|
2456
|
+
#
|
2457
|
+
# @return [Float, nil]
|
2458
|
+
def skew(bias: true)
|
2459
|
+
_s.skew(bias)
|
2460
|
+
end
|
2461
|
+
|
2462
|
+
# Compute the kurtosis (Fisher or Pearson) of a dataset.
|
2463
|
+
#
|
2464
|
+
# Kurtosis is the fourth central moment divided by the square of the
|
2465
|
+
# variance. If Fisher's definition is used, then 3.0 is subtracted from
|
2466
|
+
# the result to give 0.0 for a normal distribution.
|
2467
|
+
# If bias is false, then the kurtosis is calculated using k statistics to
|
2468
|
+
# eliminate bias coming from biased moment estimators
|
2469
|
+
#
|
2470
|
+
# @param fisher [Boolean]
|
2471
|
+
# If `true`, Fisher's definition is used (normal ==> 0.0). If `false`,
|
2472
|
+
# Pearson's definition is used (normal ==> 3.0).
|
2473
|
+
# @param bias [Boolean]
|
2474
|
+
# If `false`, the calculations are corrected for statistical bias.
|
2475
|
+
#
|
2476
|
+
# @return [Float, nil]
|
2477
|
+
def kurtosis(fisher: true, bias: true)
|
2478
|
+
_s.kurtosis(fisher, bias)
|
2479
|
+
end
|
2480
|
+
|
2481
|
+
# Clip (limit) the values in an array to a `min` and `max` boundary.
|
2482
|
+
#
|
2483
|
+
# Only works for numerical types.
|
2484
|
+
#
|
2485
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
2486
|
+
# expression. See {#when} for more information.
|
2487
|
+
#
|
2488
|
+
# @param min_val [Numeric]
|
2489
|
+
# Minimum value.
|
2490
|
+
# @param max_val [Numeric]
|
2491
|
+
# Maximum value.
|
2492
|
+
#
|
2493
|
+
# @return [Series]
|
2494
|
+
#
|
2495
|
+
# @example
|
2496
|
+
# s = Polars::Series.new("foo", [-50, 5, nil, 50])
|
2497
|
+
# s.clip(1, 10)
|
2498
|
+
# # =>
|
2499
|
+
# # shape: (4,)
|
2500
|
+
# # Series: 'foo' [i64]
|
2501
|
+
# # [
|
2502
|
+
# # 1
|
2503
|
+
# # 5
|
2504
|
+
# # null
|
2505
|
+
# # 10
|
2506
|
+
# # ]
|
2507
|
+
def clip(min_val, max_val)
|
2508
|
+
super
|
2509
|
+
end
|
618
2510
|
|
619
|
-
#
|
620
|
-
#
|
2511
|
+
# Clip (limit) the values in an array to a `min` boundary.
|
2512
|
+
#
|
2513
|
+
# Only works for numerical types.
|
2514
|
+
#
|
2515
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
2516
|
+
# expression. See {#when} for more information.
|
2517
|
+
#
|
2518
|
+
# @param min_val [Numeric]
|
2519
|
+
# Minimum value.
|
2520
|
+
#
|
2521
|
+
# @return [Series]
|
2522
|
+
def clip_min(min_val)
|
2523
|
+
super
|
2524
|
+
end
|
621
2525
|
|
622
|
-
#
|
623
|
-
#
|
2526
|
+
# Clip (limit) the values in an array to a `max` boundary.
|
2527
|
+
#
|
2528
|
+
# Only works for numerical types.
|
2529
|
+
#
|
2530
|
+
# If you want to clip other dtypes, consider writing a "when, then, otherwise"
|
2531
|
+
# expression. See {#when} for more information.
|
2532
|
+
#
|
2533
|
+
# @param max_val [Numeric]
|
2534
|
+
# Maximum value.
|
2535
|
+
#
|
2536
|
+
# @return [Series]
|
2537
|
+
def clip_max(max_val)
|
2538
|
+
super
|
2539
|
+
end
|
624
2540
|
|
625
|
-
#
|
626
|
-
#
|
2541
|
+
# Reshape this Series to a flat Series or a Series of Lists.
|
2542
|
+
#
|
2543
|
+
# @param dims [Array]
|
2544
|
+
# Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
|
2545
|
+
# dimension is inferred.
|
2546
|
+
#
|
2547
|
+
# @return [Series]
|
2548
|
+
def reshape(dims)
|
2549
|
+
super
|
2550
|
+
end
|
627
2551
|
|
628
|
-
#
|
629
|
-
#
|
2552
|
+
# Shuffle the contents of this Series.
|
2553
|
+
#
|
2554
|
+
# @param seed [Integer, nil]
|
2555
|
+
# Seed for the random number generator.
|
2556
|
+
#
|
2557
|
+
# @return [Series]
|
2558
|
+
#
|
2559
|
+
# @example
|
2560
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2561
|
+
# s.shuffle(seed: 1)
|
2562
|
+
# # =>
|
2563
|
+
# # shape: (3,)
|
2564
|
+
# # Series: 'a' [i64]
|
2565
|
+
# # [
|
2566
|
+
# # 2
|
2567
|
+
# # 1
|
2568
|
+
# # 3
|
2569
|
+
# # ]
|
2570
|
+
def shuffle(seed: nil)
|
2571
|
+
super
|
2572
|
+
end
|
630
2573
|
|
631
2574
|
# def ewm_mean
|
632
2575
|
# end
|
@@ -637,18 +2580,70 @@ module Polars
|
|
637
2580
|
# def ewm_var
|
638
2581
|
# end
|
639
2582
|
|
640
|
-
#
|
641
|
-
#
|
2583
|
+
# Extend the Series with given number of values.
|
2584
|
+
#
|
2585
|
+
# @param value [Object]
|
2586
|
+
# The value to extend the Series with. This value may be `nil` to fill with
|
2587
|
+
# nulls.
|
2588
|
+
# @param n [Integer]
|
2589
|
+
# The number of values to extend.
|
2590
|
+
#
|
2591
|
+
# @return [Series]
|
2592
|
+
#
|
2593
|
+
# @example
|
2594
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2595
|
+
# s.extend_constant(99, 2)
|
2596
|
+
# # =>
|
2597
|
+
# # shape: (5,)
|
2598
|
+
# # Series: 'a' [i64]
|
2599
|
+
# # [
|
2600
|
+
# # 1
|
2601
|
+
# # 2
|
2602
|
+
# # 3
|
2603
|
+
# # 99
|
2604
|
+
# # 99
|
2605
|
+
# # ]
|
2606
|
+
def extend_constant(value, n)
|
2607
|
+
super
|
2608
|
+
end
|
642
2609
|
|
2610
|
+
# Flags the Series as sorted.
|
2611
|
+
#
|
2612
|
+
# Enables downstream code to user fast paths for sorted arrays.
|
2613
|
+
#
|
2614
|
+
# @param reverse [Boolean]
|
2615
|
+
# If the Series order is reversed, e.g. descending.
|
2616
|
+
#
|
2617
|
+
# @return [Series]
|
2618
|
+
#
|
2619
|
+
# @note
|
2620
|
+
# This can lead to incorrect results if this Series is not sorted!!
|
2621
|
+
# Use with care!
|
2622
|
+
#
|
2623
|
+
# @example
|
2624
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2625
|
+
# s.set_sorted.max
|
2626
|
+
# # => 3
|
643
2627
|
def set_sorted(reverse: false)
|
644
2628
|
Utils.wrap_s(_s.set_sorted(reverse))
|
645
2629
|
end
|
646
2630
|
|
647
|
-
#
|
648
|
-
#
|
2631
|
+
# Create a new Series filled with values from the given index.
|
2632
|
+
#
|
2633
|
+
# @return [Series]
|
2634
|
+
def new_from_index(index, length)
|
2635
|
+
Utils.wrap_s(_s.new_from_index(index, length))
|
2636
|
+
end
|
649
2637
|
|
650
|
-
#
|
651
|
-
#
|
2638
|
+
# Shrink numeric columns to the minimal required datatype.
|
2639
|
+
#
|
2640
|
+
# Shrink to the dtype needed to fit the extrema of this Series.
|
2641
|
+
# This can be used to reduce memory pressure.
|
2642
|
+
#
|
2643
|
+
# @return [Series]
|
2644
|
+
def shrink_dtype
|
2645
|
+
super
|
2646
|
+
end
|
652
2647
|
|
653
2648
|
# def arr
|
654
2649
|
# end
|
@@ -672,8 +2667,47 @@ module Polars
|
|
672
2667
|
self._s = _s._clone
|
673
2668
|
end
|
674
2669
|
|
2670
|
+
def coerce(other)
|
2671
|
+
if other.is_a?(Numeric)
|
2672
|
+
# TODO improve
|
2673
|
+
series = to_frame.select(Polars.lit(other)).to_series
|
2674
|
+
[series, self]
|
2675
|
+
else
|
2676
|
+
raise TypeError, "#{self.class} can't be coerced into #{other.class}"
|
2677
|
+
end
|
2678
|
+
end
|
2679
|
+
|
2680
|
+
def _comp(other, op)
|
2681
|
+
if other.is_a?(Series)
|
2682
|
+
return Utils.wrap_s(_s.send(op, other._s))
|
2683
|
+
end
|
2684
|
+
|
2685
|
+
if dtype == :str
|
2686
|
+
raise Todo
|
2687
|
+
end
|
2688
|
+
Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
|
2689
|
+
end
|
2690
|
+
|
2691
|
+
def _arithmetic(other, op)
|
2692
|
+
if other.is_a?(Expr)
|
2693
|
+
other = to_frame.select(other).to_series
|
2694
|
+
end
|
2695
|
+
if other.is_a?(Series)
|
2696
|
+
return Utils.wrap_s(_s.send(op, other._s))
|
2697
|
+
end
|
2698
|
+
|
2699
|
+
raise Todo
|
2700
|
+
end
|
2701
|
+
|
2702
|
+
def series_to_rbseries(name, values)
|
2703
|
+
# should not be in-place?
|
2704
|
+
values.rename(name, in_place: true)
|
2705
|
+
values._s
|
2706
|
+
end
|
2707
|
+
|
675
2708
|
def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
|
676
2709
|
ruby_dtype = nil
|
2710
|
+
nested_dtype = nil
|
677
2711
|
|
678
2712
|
if (values.nil? || values.empty?) && dtype.nil?
|
679
2713
|
if dtype_if_empty
|
@@ -682,7 +2716,7 @@ module Polars
|
|
682
2716
|
dtype = dtype_if_empty
|
683
2717
|
else
|
684
2718
|
# default to Float32 type
|
685
|
-
dtype =
|
2719
|
+
dtype = :f32
|
686
2720
|
end
|
687
2721
|
end
|
688
2722
|
|
@@ -691,8 +2725,7 @@ module Polars
|
|
691
2725
|
rb_temporal_types << DateTime if defined?(DateTime)
|
692
2726
|
rb_temporal_types << Time if defined?(Time)
|
693
2727
|
|
694
|
-
|
695
|
-
value = values.find { |v| !v.nil? }
|
2728
|
+
value = _get_first_non_none(values)
|
696
2729
|
|
697
2730
|
if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
|
698
2731
|
constructor = polars_type_to_constructor(dtype)
|
@@ -716,7 +2749,22 @@ module Polars
|
|
716
2749
|
# dtype = rb_type_to_dtype(dtype)
|
717
2750
|
# end
|
718
2751
|
|
719
|
-
|
2752
|
+
if ruby_dtype == Date
|
2753
|
+
RbSeries.new_opt_date(name, values, strict)
|
2754
|
+
else
|
2755
|
+
raise Todo
|
2756
|
+
end
|
2757
|
+
elsif ruby_dtype == Array
|
2758
|
+
if nested_dtype.nil?
|
2759
|
+
nested_value = _get_first_non_none(value)
|
2760
|
+
nested_dtype = nested_value.nil? ? Float : nested_value.class
|
2761
|
+
end
|
2762
|
+
|
2763
|
+
if nested_dtype == Array
|
2764
|
+
raise Todo
|
2765
|
+
end
|
2766
|
+
|
2767
|
+
raise Todo
|
720
2768
|
else
|
721
2769
|
constructor = rb_type_to_constructor(value.class)
|
722
2770
|
constructor.call(name, values, strict)
|
@@ -759,5 +2807,9 @@ module Polars
|
|
759
2807
|
# RbSeries.method(:new_object)
|
760
2808
|
raise ArgumentError, "Cannot determine type"
|
761
2809
|
end
|
2810
|
+
|
2811
|
+
def _get_first_non_none(values)
|
2812
|
+
values.find { |v| !v.nil? }
|
2813
|
+
end
|
762
2814
|
end
|
763
2815
|
end
|