red_amber 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +39 -20
- data/.yardopts +2 -0
- data/CHANGELOG.md +113 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +25 -26
- data/benchmark/basic.yml +2 -2
- data/benchmark/combine.yml +2 -2
- data/benchmark/dataframe.yml +2 -2
- data/benchmark/group.yml +2 -2
- data/benchmark/reshape.yml +2 -2
- data/benchmark/vector.yml +3 -0
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +429 -75
- data/lib/red_amber/data_frame_combinable.rb +516 -66
- data/lib/red_amber/data_frame_displayable.rb +244 -14
- data/lib/red_amber/data_frame_indexable.rb +121 -18
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +622 -66
- data/lib/red_amber/data_frame_variable_operation.rb +446 -34
- data/lib/red_amber/group.rb +187 -22
- data/lib/red_amber/helper.rb +70 -10
- data/lib/red_amber/refinements.rb +12 -5
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +385 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +217 -12
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
@@ -1,40 +1,133 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
6
|
# Array, Arrow::Array and Arrow::ChunkedArray are refined
|
7
7
|
using RefineArray
|
8
8
|
using RefineArrayLike
|
9
9
|
|
10
|
+
# rubocop:disable Layout/LineLength
|
11
|
+
|
10
12
|
# Select variables or records.
|
11
13
|
#
|
12
14
|
# @overload [](key)
|
13
|
-
#
|
15
|
+
# Select single variable (column) and return as a Vetor.
|
14
16
|
#
|
15
|
-
# @param key [Symbol, String]
|
16
|
-
#
|
17
|
+
# @param key [Symbol, String]
|
18
|
+
# key name to select.
|
19
|
+
# @return [Vector]
|
20
|
+
# selected variable as a Vector.
|
17
21
|
# @note DataFrame.v(key) is faster to create Vector from a variable.
|
22
|
+
# @example Select a column and return Vector
|
23
|
+
# penguins
|
24
|
+
#
|
25
|
+
# # =>
|
26
|
+
# #<RedAmber::DataFrame : 344 x 8 Vectors, 0x00000000000039bc>
|
27
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
28
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
29
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
30
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
31
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
32
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
33
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
34
|
+
# : : : : : : ... :
|
35
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
36
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
37
|
+
# 343 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
38
|
+
#
|
39
|
+
# penguins[:bill_length_mm]
|
40
|
+
#
|
41
|
+
# # =>
|
42
|
+
# #<RedAmber::Vector(:double, size=344):0x00000000000104dc>
|
43
|
+
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
18
44
|
#
|
19
45
|
# @overload [](keys)
|
20
|
-
#
|
46
|
+
# Select variables and return a DataFrame.
|
21
47
|
#
|
22
48
|
# @param keys [<Symbol, String>] key names to select.
|
23
|
-
# @return [DataFrame]
|
49
|
+
# @return [DataFrame]
|
50
|
+
# selected variables as a DataFrame.
|
51
|
+
# @example Select columns
|
52
|
+
# penguins[:island, :bill_length_mm]
|
53
|
+
#
|
54
|
+
# # =>
|
55
|
+
# #<RedAmber::DataFrame : 344 x 2 Vectors, 0x00000000000104f0>
|
56
|
+
# island bill_length_mm
|
57
|
+
# <string> <double>
|
58
|
+
# 0 Torgersen 39.1
|
59
|
+
# 1 Torgersen 39.5
|
60
|
+
# 2 Torgersen 40.3
|
61
|
+
# 3 Torgersen (nil)
|
62
|
+
# 4 Torgersen 36.7
|
63
|
+
# : : :
|
64
|
+
# 341 Biscoe 50.4
|
65
|
+
# 342 Biscoe 45.2
|
66
|
+
# 343 Biscoe 49.9
|
24
67
|
#
|
25
68
|
# @overload [](index)
|
26
|
-
#
|
69
|
+
# Select a record and return a DataFrame.
|
27
70
|
#
|
28
71
|
# @param index [Indeger, Float, Range<Integer>, Vector, Arrow::Array]
|
29
72
|
# index of a row to select.
|
30
|
-
# @return [DataFrame]
|
73
|
+
# @return [DataFrame]
|
74
|
+
# selected variables as a DataFrame.
|
75
|
+
# @example Select a row
|
76
|
+
# penguins[0]
|
77
|
+
#
|
78
|
+
# # =>
|
79
|
+
# #<RedAmber::DataFrame : 1 x 8 Vectors, 0x0000000000010504>
|
80
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
81
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
82
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
31
83
|
#
|
32
84
|
# @overload [](indices)
|
33
|
-
#
|
85
|
+
# Select records by indices and return a DataFrame.
|
34
86
|
#
|
35
|
-
# @param indices [<Indeger
|
87
|
+
# @param indices [<Indeger>, <Float>, Range<Integer>, Vector, Arrow::Array>]
|
36
88
|
# indices of rows to select.
|
37
|
-
# @return [DataFrame]
|
89
|
+
# @return [DataFrame]
|
90
|
+
# selected variables as a DataFrame.
|
91
|
+
# @example Select rows by indices
|
92
|
+
# penguins[0..100]
|
93
|
+
#
|
94
|
+
# # =>
|
95
|
+
# #<RedAmber::DataFrame : 101 x 8 Vectors, 0x00000000000105e0>
|
96
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
97
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
98
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
99
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
100
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
101
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
102
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
103
|
+
# : : : : : : ... :
|
104
|
+
# 98 Adelie Dream 33.1 16.1 178 ... 2008
|
105
|
+
# 99 Adelie Dream 43.2 18.5 192 ... 2008
|
106
|
+
# 100 Adelie Biscoe 35.0 17.9 192 ... 2009
|
107
|
+
#
|
108
|
+
# @overload [](booleans)
|
109
|
+
# Select records by booleans and return a DataFrame.
|
110
|
+
#
|
111
|
+
# @param booleans [Array<true, false, nil>, Vector, Arrow::Array>]
|
112
|
+
# booleans of rows to select.
|
113
|
+
# @return [DataFrame]
|
114
|
+
# selected variables as a DataFrame.
|
115
|
+
# @example Select rows by booleans
|
116
|
+
# penguins[penguins.species == 'Adelie']
|
117
|
+
#
|
118
|
+
# # =>
|
119
|
+
# #<RedAmber::DataFrame : 152 x 8 Vectors, 0x0000000000010658>
|
120
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
121
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
122
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
123
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
124
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
125
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
126
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
127
|
+
# : : : : : : ... :
|
128
|
+
# 149 Adelie Dream 37.8 18.1 193 ... 2009
|
129
|
+
# 150 Adelie Dream 36.0 17.1 187 ... 2009
|
130
|
+
# 151 Adelie Dream 41.5 18.5 201 ... 2009
|
38
131
|
#
|
39
132
|
def [](*args)
|
40
133
|
raise DataFrameArgumentError, 'self is an empty dataframe' if empty?
|
@@ -52,10 +145,10 @@ module RedAmber
|
|
52
145
|
arrow_array = aa
|
53
146
|
else
|
54
147
|
a = parse_args(args, size)
|
55
|
-
return select_variables_by_keys(a) if a.
|
56
|
-
return take(normalize_indices(Arrow::Array.new(a))) if a.
|
148
|
+
return select_variables_by_keys(a) if a.symbol?
|
149
|
+
return take(normalize_indices(Arrow::Array.new(a))) if a.integer?
|
57
150
|
return remove_all_values if a.compact.empty?
|
58
|
-
return filter_by_array(Arrow::BooleanArray.new(a)) if a.
|
151
|
+
return filter_by_array(Arrow::BooleanArray.new(a)) if a.boolean?
|
59
152
|
|
60
153
|
raise DataFrameArgumentError, "invalid arguments: #{args}"
|
61
154
|
end
|
@@ -64,12 +157,25 @@ module RedAmber
|
|
64
157
|
return filter_by_array(arrow_array) if arrow_array.boolean?
|
65
158
|
|
66
159
|
a = arrow_array.to_a
|
67
|
-
return select_variables_by_keys(a) if a.
|
160
|
+
return select_variables_by_keys(a) if a.symbol_or_string?
|
68
161
|
|
69
162
|
raise DataFrameArgumentError, "invalid arguments: #{args}"
|
70
163
|
end
|
71
164
|
|
72
|
-
# Select a variable by
|
165
|
+
# Select a variable by String or Symbol and return as a Vector.
|
166
|
+
#
|
167
|
+
# @param key [Symbol, String]
|
168
|
+
# key name to select.
|
169
|
+
# @return [Vector]
|
170
|
+
# selected variable as a Vector.
|
171
|
+
# @note #v(key) is faster then #[](key).
|
172
|
+
# @example Select a column and return Vector
|
173
|
+
# penguins.v(:bill_length_mm)
|
174
|
+
#
|
175
|
+
# # =>
|
176
|
+
# #<RedAmber::Vector(:double, size=344):0x000000000000f140>
|
177
|
+
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
178
|
+
#
|
73
179
|
def v(key)
|
74
180
|
unless key.is_a?(Symbol) || key.is_a?(String)
|
75
181
|
raise DataFrameArgumentError, "Key is not a Symbol or a String: [#{key}]"
|
@@ -82,30 +188,168 @@ module RedAmber
|
|
82
188
|
# Select records to create a DataFrame.
|
83
189
|
#
|
84
190
|
# @overload slice(row)
|
85
|
-
#
|
191
|
+
# Select a record and return a DataFrame.
|
86
192
|
#
|
87
|
-
# @param row [Indeger, Float
|
88
|
-
# a row index to select.
|
89
|
-
# @yield [self] gives self to the block.
|
90
|
-
# @note The block is evaluated within the context of self.
|
91
|
-
# It is accessable to self's instance variables and private methods.
|
92
|
-
# @yieldreturn [Indeger, Float, Range<Integer>, Vector, Arrow::Array]
|
193
|
+
# @param row [Indeger, Float]
|
93
194
|
# a row index to select.
|
94
|
-
# @return [DataFrame]
|
195
|
+
# @return [DataFrame]
|
196
|
+
# selected records as a DataFrame.
|
197
|
+
# @example Select a row
|
198
|
+
# penguins
|
199
|
+
#
|
200
|
+
# # =>
|
201
|
+
# #<RedAmber::DataFrame : 344 x 8 Vectors, 0x00000000000039bc>
|
202
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
203
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
204
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
205
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
206
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
207
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
208
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
209
|
+
# : : : : : : ... :
|
210
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
211
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
212
|
+
# 343 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
213
|
+
# penguins.slice(2)
|
214
|
+
#
|
215
|
+
# # =>
|
216
|
+
# #<RedAmber::DataFrame : 1 x 8 Vectors, 0x00000000000039d0>
|
217
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
218
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
219
|
+
# 0 Adelie Torgersen 40.3 18.0 195 ... 2007
|
95
220
|
#
|
96
221
|
# @overload slice(rows)
|
97
|
-
#
|
222
|
+
# Select records and return a DataFrame.
|
98
223
|
# - Duplicated selection is acceptable. The same record will be returned.
|
99
224
|
# - The order of records will be the same as specified indices.
|
100
225
|
#
|
101
|
-
# @param rows [Integer
|
226
|
+
# @param rows [<Integer>, <Float>, Range<Integer>, Vector, Arrow::Array]
|
102
227
|
# row indeces to select.
|
103
|
-
# @
|
104
|
-
#
|
105
|
-
#
|
106
|
-
#
|
228
|
+
# @return [DataFrame]
|
229
|
+
# selected records as a DataFrame.
|
230
|
+
# @example Select rows
|
231
|
+
# penguins.slice(300..-1)
|
232
|
+
#
|
233
|
+
# # =>
|
234
|
+
# #<RedAmber::DataFrame : 44 x 8 Vectors, 0x000000000000fb54>
|
235
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
236
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
237
|
+
# 0 Gentoo Biscoe 49.1 14.5 212 ... 2009
|
238
|
+
# 1 Gentoo Biscoe 52.5 15.6 221 ... 2009
|
239
|
+
# 2 Gentoo Biscoe 47.4 14.6 212 ... 2009
|
240
|
+
# 3 Gentoo Biscoe 50.0 15.9 224 ... 2009
|
241
|
+
# 4 Gentoo Biscoe 44.9 13.8 212 ... 2009
|
242
|
+
# : : : : : : ... :
|
243
|
+
# 41 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
244
|
+
# 42 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
245
|
+
# 43 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
246
|
+
#
|
247
|
+
# @overload slice(enumerator)
|
248
|
+
# Select records and return a DataFrame.
|
249
|
+
# - Duplicated selection is acceptable. The same record will be returned.
|
250
|
+
# - The order of records will be the same as specified indices.
|
251
|
+
#
|
252
|
+
# @param enumerator [Enumerator]
|
253
|
+
# an enumerator which returns row indeces to select.
|
254
|
+
# @return [DataFrame]
|
255
|
+
# selected records as a DataFrame.
|
256
|
+
# @example Select rows by Enumerator.
|
257
|
+
# penguins.assign_left(index: penguins.indices) # 0.2.0 feature
|
258
|
+
# .slice(0.step(by: 10, to: 340))
|
259
|
+
#
|
260
|
+
# # =>
|
261
|
+
# #<RedAmber::DataFrame : 35 x 9 Vectors, 0x000000000000f2e4>
|
262
|
+
# index species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
263
|
+
# <uint16> <string> <string> <double> <double> <uint8> ... <uint16>
|
264
|
+
# 0 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
265
|
+
# 1 10 Adelie Torgersen 37.8 17.1 186 ... 2007
|
266
|
+
# 2 20 Adelie Biscoe 37.8 18.3 174 ... 2007
|
267
|
+
# 3 30 Adelie Dream 39.5 16.7 178 ... 2007
|
268
|
+
# 4 40 Adelie Dream 36.5 18.0 182 ... 2007
|
269
|
+
# : : : : : : : ... :
|
270
|
+
# 32 320 Gentoo Biscoe 48.5 15.0 219 ... 2009
|
271
|
+
# 33 330 Gentoo Biscoe 50.5 15.2 216 ... 2009
|
272
|
+
# 34 340 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
273
|
+
#
|
274
|
+
# @overload slice
|
275
|
+
# Select records by indices with block and return a DataFrame.
|
276
|
+
# - Duplicated selection is acceptable. The same record will be returned.
|
277
|
+
# - The order of records will be the same as specified indices.
|
278
|
+
#
|
279
|
+
# @yieldparam self [DataFrame]
|
280
|
+
# gives self to the block.
|
281
|
+
# The block is evaluated within the context of self.
|
282
|
+
# @yieldreturn [<Integer>, <Float>, Range<Integer>, Vector, Arrow::Array, Enumerator]
|
107
283
|
# row indeces to select.
|
108
|
-
# @return [DataFrame]
|
284
|
+
# @return [DataFrame]
|
285
|
+
# selected records as a DataFrame.
|
286
|
+
# @example Select rows by block
|
287
|
+
# penguins.assign_left(index: penguins.indices) # 0.2.0 feature
|
288
|
+
# .slice { 0.step(by: 100, to: 300).map { |i| i..(i+1) } }
|
289
|
+
#
|
290
|
+
# # =>
|
291
|
+
# #<RedAmber::DataFrame : 8 x 9 Vectors, 0x000000000000f3ac>
|
292
|
+
# index species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
293
|
+
# <uint16> <string> <string> <double> <double> <uint8> ... <uint16>
|
294
|
+
# 0 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
295
|
+
# 1 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
296
|
+
# 2 100 Adelie Biscoe 35.0 17.9 192 ... 2009
|
297
|
+
# 3 101 Adelie Biscoe 41.0 20.0 203 ... 2009
|
298
|
+
# 4 200 Chinstrap Dream 51.5 18.7 187 ... 2009
|
299
|
+
# 5 201 Chinstrap Dream 49.8 17.3 198 ... 2009
|
300
|
+
# 6 300 Gentoo Biscoe 49.1 14.5 212 ... 2009
|
301
|
+
# 7 301 Gentoo Biscoe 52.5 15.6 221 ... 2009
|
302
|
+
#
|
303
|
+
# @overload slice(booleans)
|
304
|
+
# Select records by filtering with booleans and return a DataFrame.
|
305
|
+
#
|
306
|
+
# @param booleans [<Boolean, nil>, Vector, Arrow::Array]
|
307
|
+
# a boolean filter.
|
308
|
+
# @return [DataFrame]
|
309
|
+
# filtered records as a DataFrame.
|
310
|
+
# @example Select rows by boolean filter
|
311
|
+
# penguins.slice(penguins[:bill_length_mm] > 50)
|
312
|
+
#
|
313
|
+
# # =>
|
314
|
+
# #<RedAmber::DataFrame : 52 x 8 Vectors, 0x000000000000fd98>
|
315
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
316
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
317
|
+
# 0 Chinstrap Dream 51.3 19.2 193 ... 2007
|
318
|
+
# 1 Chinstrap Dream 52.7 19.8 197 ... 2007
|
319
|
+
# 2 Chinstrap Dream 51.3 18.2 197 ... 2007
|
320
|
+
# 3 Chinstrap Dream 51.3 19.9 198 ... 2007
|
321
|
+
# 4 Chinstrap Dream 51.7 20.3 194 ... 2007
|
322
|
+
# : : : : : : ... :
|
323
|
+
# 49 Gentoo Biscoe 51.5 16.3 230 ... 2009
|
324
|
+
# 50 Gentoo Biscoe 55.1 16.0 230 ... 2009
|
325
|
+
# 51 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
326
|
+
#
|
327
|
+
# @overload slice
|
328
|
+
# Select records by filtering with block and return a DataFrame.
|
329
|
+
#
|
330
|
+
# @yieldparam self [DataFrame]
|
331
|
+
# gives self to the block.
|
332
|
+
# The block is evaluated within the context of self.
|
333
|
+
# @yieldreturn [<Boolean, nil>, Vector, Arrow::Array]
|
334
|
+
# a boolean filter. `Vector` or `Arrow::Array` must be boolean type.
|
335
|
+
# @return [DataFrame]
|
336
|
+
# filtered records as a DataFrame.
|
337
|
+
# @example Select rows by booleans from block
|
338
|
+
# penguins.slice { indices.map(&:even?) }
|
339
|
+
#
|
340
|
+
# # =>
|
341
|
+
# #<RedAmber::DataFrame : 172 x 8 Vectors, 0x000000000000ff78>
|
342
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
343
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
344
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
345
|
+
# 1 Adelie Torgersen 40.3 18.0 195 ... 2007
|
346
|
+
# 2 Adelie Torgersen 36.7 19.3 193 ... 2007
|
347
|
+
# 3 Adelie Torgersen 38.9 17.8 181 ... 2007
|
348
|
+
# 4 Adelie Torgersen 34.1 18.1 193 ... 2007
|
349
|
+
# : : : : : : ... :
|
350
|
+
# 169 Gentoo Biscoe 47.2 13.7 214 ... 2009
|
351
|
+
# 170 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
352
|
+
# 171 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
109
353
|
#
|
110
354
|
def slice(*args, &block)
|
111
355
|
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
@@ -142,6 +386,73 @@ module RedAmber
|
|
142
386
|
end
|
143
387
|
end
|
144
388
|
|
389
|
+
# Select records by a column specified by a key
|
390
|
+
# and corresponding record with a block.
|
391
|
+
#
|
392
|
+
# @overload slice_by(key)
|
393
|
+
# Select records by elements.
|
394
|
+
#
|
395
|
+
# @param key [Symbol, String]
|
396
|
+
# a key to select column.
|
397
|
+
# @param keep_key [true, false]
|
398
|
+
# preserve column specified by key in the result if true.
|
399
|
+
# @yieldparam self [DataFrame]
|
400
|
+
# gives self to the block.
|
401
|
+
# The block is evaluated within the context of self.
|
402
|
+
# @yieldreturn [<elements>]
|
403
|
+
# array of elements to select.
|
404
|
+
# @return [DataFrame]
|
405
|
+
# selected records as a DataFrame.
|
406
|
+
# @example Select records by elements
|
407
|
+
# df
|
408
|
+
#
|
409
|
+
# # =>
|
410
|
+
# #<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
|
411
|
+
# index float string
|
412
|
+
# <uint8> <double> <string>
|
413
|
+
# 0 0 0.0 A
|
414
|
+
# 1 1 1.1 B
|
415
|
+
# 2 2 2.2 C
|
416
|
+
# 3 3 NaN D
|
417
|
+
# 4 (nil) (nil) (nil)
|
418
|
+
#
|
419
|
+
# df.slice_by(:string) { ["A", "C"] }
|
420
|
+
#
|
421
|
+
# # =>
|
422
|
+
# #<RedAmber::DataFrame : 2 x 2 Vectors, 0x000000000001b1ac>
|
423
|
+
# index float
|
424
|
+
# <uint8> <double>
|
425
|
+
# 0 0 0.0
|
426
|
+
# 1 2 2.2
|
427
|
+
#
|
428
|
+
# @overload slice_by(key)
|
429
|
+
# Select records by elements range.
|
430
|
+
#
|
431
|
+
# @param key [Symbol, String]
|
432
|
+
# a key to select column.
|
433
|
+
# @param keep_key [true, false]
|
434
|
+
# preserve column specified by key in the result if true.
|
435
|
+
# @yieldparam self [DataFrame]
|
436
|
+
# gives self to the block.
|
437
|
+
# The block is evaluated within the context of self.
|
438
|
+
# @yieldreturn [Range]
|
439
|
+
# specifies position of elements at the start and the end and
|
440
|
+
# select records between them.
|
441
|
+
# @return [DataFrame]
|
442
|
+
# selected records as a DataFrame.
|
443
|
+
# @example Select records by elements range
|
444
|
+
# df.slice_by(:string) { "A".."C" }
|
445
|
+
#
|
446
|
+
# # =>
|
447
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000069668>
|
448
|
+
# index float
|
449
|
+
# <uint8> <double>
|
450
|
+
# 0 0 0.0
|
451
|
+
# 1 1 1.1
|
452
|
+
# 2 2 2.2
|
453
|
+
#
|
454
|
+
# @since 0.2.1
|
455
|
+
#
|
145
456
|
def slice_by(key, keep_key: false, &block)
|
146
457
|
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
147
458
|
raise DataFrameArgumentError, 'No block given' unless block
|
@@ -183,33 +494,242 @@ module RedAmber
|
|
183
494
|
keep_key ? taken : taken.drop(key)
|
184
495
|
end
|
185
496
|
|
497
|
+
# Select records by filtering with booleans to create a DataFrame.
|
498
|
+
#
|
499
|
+
# @overload filter(booleans)
|
500
|
+
# Select records by filtering with booleans and return a DataFrame.
|
501
|
+
#
|
502
|
+
# @param booleans [<Boolean, nil>, Vector, Arrow::Array]
|
503
|
+
# a boolean filter.
|
504
|
+
# @return [DataFrame]
|
505
|
+
# filtered records as a DataFrame.
|
506
|
+
# @example Filter by boolean Vector
|
507
|
+
# penguins
|
508
|
+
#
|
509
|
+
# # =>
|
510
|
+
# #<RedAmber::DataFrame : 344 x 8 Vectors, 0x00000000000039bc>
|
511
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
512
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
513
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
514
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
515
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
516
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
517
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
518
|
+
# : : : : : : ... :
|
519
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
520
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
521
|
+
# 343 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
522
|
+
#
|
523
|
+
# penguins.filter(penguins.bill_length_mm < 50)
|
524
|
+
#
|
525
|
+
# # =>
|
526
|
+
# #<RedAmber::DataFrame : 285 x 8 Vectors, 0x00000000000101a8>
|
527
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
528
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
529
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
530
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
531
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
532
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
533
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
534
|
+
# : : : : : : ... :
|
535
|
+
# 282 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
536
|
+
# 283 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
537
|
+
# 284 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
538
|
+
#
|
539
|
+
# @overload filter
|
540
|
+
# Select records by filtering with block and return a DataFrame.
|
541
|
+
#
|
542
|
+
# @yieldparam self [DataFrame]
|
543
|
+
# gives self to the block.
|
544
|
+
# The block is evaluated within the context of self.
|
545
|
+
# @yieldreturn [<Boolean, nil>, Vector, Arrow::Array]
|
546
|
+
# a boolean filter. `Vector` or `Arrow::Array` must be boolean type.
|
547
|
+
# @return [DataFrame]
|
548
|
+
# filtered records as a DataFrame.
|
549
|
+
# @example Filter by boolean Vector
|
550
|
+
# penguins.filter { bill_length_mm < 50 }
|
551
|
+
#
|
552
|
+
# # =>
|
553
|
+
# #<RedAmber::DataFrame : 285 x 8 Vectors, 0x00000000000101bc>
|
554
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
555
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
556
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
557
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
558
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
559
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
560
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
561
|
+
# : : : : : : ... :
|
562
|
+
# 282 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
563
|
+
# 283 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
564
|
+
# 284 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
565
|
+
#
|
566
|
+
def filter(*booleans, &block)
|
567
|
+
booleans.flatten!
|
568
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
569
|
+
|
570
|
+
if block
|
571
|
+
unless booleans.empty?
|
572
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.'
|
573
|
+
end
|
574
|
+
|
575
|
+
booleans = [instance_eval(&block)]
|
576
|
+
end
|
577
|
+
|
578
|
+
case booleans
|
579
|
+
in [] | [[]]
|
580
|
+
return remove_all_values
|
581
|
+
in [Vector => v] if v.boolean?
|
582
|
+
filter_by_array(v.data)
|
583
|
+
in [Arrow::ChunkedArray => ca] if ca.boolean?
|
584
|
+
filter_by_array(ca)
|
585
|
+
in [Arrow::BooleanArray => b]
|
586
|
+
filter_by_array(b)
|
587
|
+
else
|
588
|
+
a = Arrow::Array.new(parse_args(booleans, size))
|
589
|
+
unless a.boolean?
|
590
|
+
raise DataFrameArgumentError, "not a boolean filter: #{booleans}"
|
591
|
+
end
|
592
|
+
|
593
|
+
filter_by_array(a)
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
186
597
|
# Select records and remove them to create a remainer DataFrame.
|
187
598
|
#
|
188
599
|
# @overload remove(row)
|
189
|
-
#
|
600
|
+
# Select a record and remove it to create a remainer DataFrame.
|
190
601
|
# - The order of records in self will be preserved.
|
191
602
|
#
|
192
|
-
# @param row [Indeger, Float
|
603
|
+
# @param row [Indeger, Float]
|
193
604
|
# a row index to remove.
|
194
|
-
# @
|
195
|
-
#
|
196
|
-
#
|
197
|
-
#
|
198
|
-
#
|
199
|
-
#
|
605
|
+
# @return [DataFrame]
|
606
|
+
# remainer variables as a DataFrame.
|
607
|
+
# @example Remove a row
|
608
|
+
# penguins.remove(-1)
|
609
|
+
#
|
610
|
+
# # =>
|
611
|
+
# #<RedAmber::DataFrame : 343 x 8 Vectors, 0x0000000000010310>
|
612
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
613
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
614
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
615
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
616
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
617
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
618
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
619
|
+
# : : : : : : ... :
|
620
|
+
# 340 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
621
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
622
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
200
623
|
#
|
201
624
|
# @overload remove(rows)
|
202
|
-
#
|
625
|
+
# Select records and remove them to create a remainer DataFrame.
|
626
|
+
# - Duplicated selection is acceptable.
|
203
627
|
# - The order of records in self will be preserved.
|
204
628
|
#
|
205
|
-
# @param rows [
|
629
|
+
# @param rows [<Integer>, <Float>, Range<Integer>, Vector, Arrow::Array]
|
206
630
|
# row indeces to remove.
|
207
|
-
# @
|
208
|
-
#
|
209
|
-
#
|
210
|
-
#
|
631
|
+
# @return [DataFrame]
|
632
|
+
# remainer variables as a DataFrame.
|
633
|
+
# @example Remove rows
|
634
|
+
# penguins.remove(100..200)
|
635
|
+
#
|
636
|
+
# # =>
|
637
|
+
# #<RedAmber::DataFrame : 243 x 8 Vectors, 0x0000000000010450>
|
638
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
639
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
640
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
641
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
642
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
643
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
644
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
645
|
+
# : : : : : : ... :
|
646
|
+
# 240 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
647
|
+
# 241 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
648
|
+
# 242 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
649
|
+
#
|
650
|
+
# @overload remove
|
651
|
+
# Select records by indices from block
|
652
|
+
# and remove them to create a remainer DataFrame.
|
653
|
+
# - Duplicated selection is acceptable.
|
654
|
+
# - The order of records in self will be preserved.
|
655
|
+
#
|
656
|
+
# @yieldparam self [DataFrame]
|
657
|
+
# gives self to the block.
|
658
|
+
# The block is evaluated within the context of self.
|
659
|
+
# @yieldreturn [<Integer, Float>, Range<Integer>, Vector, Arrow::Array]
|
211
660
|
# row indeces to remove.
|
212
|
-
# @return [DataFrame]
|
661
|
+
# @return [DataFrame]
|
662
|
+
# remainer variables as a DataFrame.
|
663
|
+
# @example Remove rows by indices from block
|
664
|
+
# penguins.remove { 0.step(size, 10) }
|
665
|
+
#
|
666
|
+
# # =>
|
667
|
+
# #<RedAmber::DataFrame : 309 x 8 Vectors, 0x00000000000104c8>
|
668
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
669
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
670
|
+
# 0 Adelie Torgersen 39.5 17.4 186 ... 2007
|
671
|
+
# 1 Adelie Torgersen 40.3 18.0 195 ... 2007
|
672
|
+
# 2 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
673
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
674
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
675
|
+
# : : : : : : ... :
|
676
|
+
# 306 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
677
|
+
# 307 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
678
|
+
# 308 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
679
|
+
#
|
680
|
+
# @overload remove(booleans)
|
681
|
+
# Select records by filtering with booleans and return a DataFrame.
|
682
|
+
# - The order of records in self will be preserved.
|
683
|
+
#
|
684
|
+
# @param booleans [<Boolean, nil>, Vector, Arrow::Array]
|
685
|
+
# a boolean filter to remove.
|
686
|
+
# @return [DataFrame]
|
687
|
+
# remainer records as a DataFrame.
|
688
|
+
# @example Remove rows by boolean filter
|
689
|
+
# penguins.remove(penguins.bill_length_mm.is_nil)
|
690
|
+
#
|
691
|
+
# # =>
|
692
|
+
# #<RedAmber::DataFrame : 342 x 8 Vectors, 0x0000000000010234>
|
693
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
694
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
695
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
696
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
697
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
698
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
699
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
700
|
+
# : : : : : : ... :
|
701
|
+
# 339 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
702
|
+
# 340 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
703
|
+
# 341 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
704
|
+
#
|
705
|
+
# @overload remove
|
706
|
+
# Select records by booleans from block
|
707
|
+
# and remove them to create a remainer DataFrame.
|
708
|
+
# - The order of records in self will be preserved.
|
709
|
+
#
|
710
|
+
# @yieldparam self [DataFrame]
|
711
|
+
# gives self to the block.
|
712
|
+
# The block is evaluated within the context of self.
|
713
|
+
# @yieldreturn [<Boolean, nil>, Vector, Arrow::Array]
|
714
|
+
# a boolean filter to remove. `Vector` or `Arrow::Array` must be boolean type.
|
715
|
+
# @return [DataFrame]
|
716
|
+
# remainer records as a DataFrame.
|
717
|
+
# @example Remove rows by booleans from block
|
718
|
+
# penguins.remove { (species == 'Adelie') | (year == 2009) }
|
719
|
+
#
|
720
|
+
# # =>
|
721
|
+
# #<RedAmber::DataFrame : 124 x 8 Vectors, 0x00000000000102fc>
|
722
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
723
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
724
|
+
# 0 Chinstrap Dream 46.5 17.9 192 ... 2007
|
725
|
+
# 1 Chinstrap Dream 50.0 19.5 196 ... 2007
|
726
|
+
# 2 Chinstrap Dream 51.3 19.2 193 ... 2007
|
727
|
+
# 3 Chinstrap Dream 45.4 18.7 188 ... 2007
|
728
|
+
# 4 Chinstrap Dream 52.7 19.8 197 ... 2007
|
729
|
+
# : : : : : : ... :
|
730
|
+
# 121 Gentoo Biscoe 51.1 16.3 220 ... 2008
|
731
|
+
# 122 Gentoo Biscoe 45.2 13.8 215 ... 2008
|
732
|
+
# 123 Gentoo Biscoe 45.2 16.4 223 ... 2008
|
213
733
|
#
|
214
734
|
def remove(*args, &block)
|
215
735
|
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
@@ -249,57 +769,93 @@ module RedAmber
|
|
249
769
|
end
|
250
770
|
end
|
251
771
|
|
772
|
+
# Remove records (rows) contains any nil.
|
773
|
+
#
|
774
|
+
# @return [DataFrame]
|
775
|
+
# removed DataFrame.
|
776
|
+
# @example
|
777
|
+
# penguins.remove_nil
|
778
|
+
# # =>
|
779
|
+
# #<RedAmber::DataFrame : 333 x 8 Vectors, 0x00000000000039d0>
|
780
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
781
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
782
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
783
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
784
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
785
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
786
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
787
|
+
# : : : : : : ... :
|
788
|
+
# 330 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
789
|
+
# 331 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
790
|
+
# 332 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
791
|
+
#
|
252
792
|
def remove_nil
|
253
793
|
func = Arrow::Function.find(:drop_null)
|
254
794
|
DataFrame.create(func.execute([table]).value)
|
255
795
|
end
|
256
796
|
alias_method :drop_nil, :remove_nil
|
257
797
|
|
798
|
+
# Select records from the top.
|
799
|
+
#
|
800
|
+
# @param n_obs [Integer]
|
801
|
+
# number of records to select.
|
802
|
+
# @return [DataFrame]
|
803
|
+
#
|
258
804
|
def head(n_obs = 5)
|
259
805
|
raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
|
260
806
|
|
261
807
|
self[0...[n_obs, size].min]
|
262
808
|
end
|
263
809
|
|
810
|
+
# Select records from the end.
|
811
|
+
#
|
812
|
+
# @param n_obs [Integer]
|
813
|
+
# number of records to select.
|
814
|
+
# @return [DataFrame]
|
815
|
+
#
|
264
816
|
def tail(n_obs = 5)
|
265
817
|
raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
|
266
818
|
|
267
819
|
self[-[n_obs, size].min..]
|
268
820
|
end
|
269
821
|
|
822
|
+
# Select records from the top.
|
823
|
+
#
|
824
|
+
# @param n_obs [Integer]
|
825
|
+
# number of records to select.
|
826
|
+
# @return [DataFrame]
|
827
|
+
#
|
270
828
|
def first(n_obs = 1)
|
271
829
|
head(n_obs)
|
272
830
|
end
|
273
831
|
|
832
|
+
# Select records from the end.
|
833
|
+
#
|
834
|
+
# @param n_obs [Integer]
|
835
|
+
# number of records to select.
|
836
|
+
# @return [DataFrame]
|
837
|
+
#
|
274
838
|
def last(n_obs = 1)
|
275
839
|
tail(n_obs)
|
276
840
|
end
|
277
841
|
|
842
|
+
# Select records by index Array to create a DataFrame.
|
843
|
+
#
|
844
|
+
# - TODO: support for option `boundscheck: true`
|
845
|
+
# - Supports indices in an Arrow::UInt8, UInt16, Uint32, Uint64 or an Array
|
846
|
+
# - Negative index is not supported.
|
847
|
+
# @param index_array [<Integer>, Arrow::Array]
|
848
|
+
# row indeces to select.
|
849
|
+
# @return [DataFrame]
|
850
|
+
# selected variables as a DataFrame.
|
851
|
+
#
|
278
852
|
# @api private
|
279
|
-
#
|
280
|
-
# Supports indices in an Arrow::UInt{8, 16, 32, 64} or an Array
|
281
|
-
# Negative index is not supported.
|
853
|
+
#
|
282
854
|
def take(index_array)
|
283
855
|
DataFrame.create(@table.take(index_array))
|
284
856
|
end
|
285
857
|
|
286
|
-
#
|
287
|
-
# TODO: support for option `null_selection_behavior: :drop``
|
288
|
-
def filter(*booleans)
|
289
|
-
booleans.flatten!
|
290
|
-
case booleans
|
291
|
-
in []
|
292
|
-
return remove_all_values
|
293
|
-
in [Arrow::BooleanArray => b]
|
294
|
-
filter_by_array(b)
|
295
|
-
else
|
296
|
-
unless booleans.booleans?
|
297
|
-
raise DataFrameArgumentError, 'Argument is not a boolean.'
|
298
|
-
end
|
299
|
-
|
300
|
-
filter_by_array(Arrow::BooleanArray.new(booleans))
|
301
|
-
end
|
302
|
-
end
|
858
|
+
# rubocop:enable Layout/LineLength
|
303
859
|
|
304
860
|
private
|
305
861
|
|