red_amber 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
@@ -1,40 +1,133 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
6
|
# Array, Arrow::Array and Arrow::ChunkedArray are refined
|
7
7
|
using RefineArray
|
8
8
|
using RefineArrayLike
|
9
9
|
|
10
|
+
# rubocop:disable Layout/LineLength
|
11
|
+
|
10
12
|
# Select variables or records.
|
11
13
|
#
|
12
14
|
# @overload [](key)
|
13
|
-
#
|
15
|
+
# Select single variable (column) and return as a Vetor.
|
14
16
|
#
|
15
|
-
# @param key [Symbol, String]
|
16
|
-
#
|
17
|
+
# @param key [Symbol, String]
|
18
|
+
# key name to select.
|
19
|
+
# @return [Vector]
|
20
|
+
# selected variable as a Vector.
|
17
21
|
# @note DataFrame.v(key) is faster to create Vector from a variable.
|
22
|
+
# @example Select a column and return Vector
|
23
|
+
# penguins
|
24
|
+
#
|
25
|
+
# # =>
|
26
|
+
# #<RedAmber::DataFrame : 344 x 8 Vectors, 0x00000000000039bc>
|
27
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
28
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
29
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
30
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
31
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
32
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
33
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
34
|
+
# : : : : : : ... :
|
35
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
36
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
37
|
+
# 343 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
38
|
+
#
|
39
|
+
# penguins[:bill_length_mm]
|
40
|
+
#
|
41
|
+
# # =>
|
42
|
+
# #<RedAmber::Vector(:double, size=344):0x00000000000104dc>
|
43
|
+
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
18
44
|
#
|
19
45
|
# @overload [](keys)
|
20
|
-
#
|
46
|
+
# Select variables and return a DataFrame.
|
21
47
|
#
|
22
48
|
# @param keys [<Symbol, String>] key names to select.
|
23
|
-
# @return [DataFrame]
|
49
|
+
# @return [DataFrame]
|
50
|
+
# selected variables as a DataFrame.
|
51
|
+
# @example Select columns
|
52
|
+
# penguins[:island, :bill_length_mm]
|
53
|
+
#
|
54
|
+
# # =>
|
55
|
+
# #<RedAmber::DataFrame : 344 x 2 Vectors, 0x00000000000104f0>
|
56
|
+
# island bill_length_mm
|
57
|
+
# <string> <double>
|
58
|
+
# 0 Torgersen 39.1
|
59
|
+
# 1 Torgersen 39.5
|
60
|
+
# 2 Torgersen 40.3
|
61
|
+
# 3 Torgersen (nil)
|
62
|
+
# 4 Torgersen 36.7
|
63
|
+
# : : :
|
64
|
+
# 341 Biscoe 50.4
|
65
|
+
# 342 Biscoe 45.2
|
66
|
+
# 343 Biscoe 49.9
|
24
67
|
#
|
25
68
|
# @overload [](index)
|
26
|
-
#
|
69
|
+
# Select a record and return a DataFrame.
|
27
70
|
#
|
28
71
|
# @param index [Indeger, Float, Range<Integer>, Vector, Arrow::Array]
|
29
72
|
# index of a row to select.
|
30
|
-
# @return [DataFrame]
|
73
|
+
# @return [DataFrame]
|
74
|
+
# selected variables as a DataFrame.
|
75
|
+
# @example Select a row
|
76
|
+
# penguins[0]
|
77
|
+
#
|
78
|
+
# # =>
|
79
|
+
# #<RedAmber::DataFrame : 1 x 8 Vectors, 0x0000000000010504>
|
80
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
81
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
82
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
31
83
|
#
|
32
84
|
# @overload [](indices)
|
33
|
-
#
|
85
|
+
# Select records by indices and return a DataFrame.
|
34
86
|
#
|
35
|
-
# @param indices [<Indeger
|
87
|
+
# @param indices [<Indeger>, <Float>, Range<Integer>, Vector, Arrow::Array>]
|
36
88
|
# indices of rows to select.
|
37
|
-
# @return [DataFrame]
|
89
|
+
# @return [DataFrame]
|
90
|
+
# selected variables as a DataFrame.
|
91
|
+
# @example Select rows by indices
|
92
|
+
# penguins[0..100]
|
93
|
+
#
|
94
|
+
# # =>
|
95
|
+
# #<RedAmber::DataFrame : 101 x 8 Vectors, 0x00000000000105e0>
|
96
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
97
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
98
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
99
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
100
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
101
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
102
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
103
|
+
# : : : : : : ... :
|
104
|
+
# 98 Adelie Dream 33.1 16.1 178 ... 2008
|
105
|
+
# 99 Adelie Dream 43.2 18.5 192 ... 2008
|
106
|
+
# 100 Adelie Biscoe 35.0 17.9 192 ... 2009
|
107
|
+
#
|
108
|
+
# @overload [](booleans)
|
109
|
+
# Select records by booleans and return a DataFrame.
|
110
|
+
#
|
111
|
+
# @param booleans [Array<true, false, nil>, Vector, Arrow::Array>]
|
112
|
+
# booleans of rows to select.
|
113
|
+
# @return [DataFrame]
|
114
|
+
# selected variables as a DataFrame.
|
115
|
+
# @example Select rows by booleans
|
116
|
+
# penguins[penguins.species == 'Adelie']
|
117
|
+
#
|
118
|
+
# # =>
|
119
|
+
# #<RedAmber::DataFrame : 152 x 8 Vectors, 0x0000000000010658>
|
120
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
121
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
122
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
123
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
124
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
125
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
126
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
127
|
+
# : : : : : : ... :
|
128
|
+
# 149 Adelie Dream 37.8 18.1 193 ... 2009
|
129
|
+
# 150 Adelie Dream 36.0 17.1 187 ... 2009
|
130
|
+
# 151 Adelie Dream 41.5 18.5 201 ... 2009
|
38
131
|
#
|
39
132
|
def [](*args)
|
40
133
|
raise DataFrameArgumentError, 'self is an empty dataframe' if empty?
|
@@ -52,10 +145,10 @@ module RedAmber
|
|
52
145
|
arrow_array = aa
|
53
146
|
else
|
54
147
|
a = parse_args(args, size)
|
55
|
-
return select_variables_by_keys(a) if a.
|
56
|
-
return take(normalize_indices(Arrow::Array.new(a))) if a.
|
148
|
+
return select_variables_by_keys(a) if a.symbol?
|
149
|
+
return take(normalize_indices(Arrow::Array.new(a))) if a.integer?
|
57
150
|
return remove_all_values if a.compact.empty?
|
58
|
-
return filter_by_array(Arrow::BooleanArray.new(a)) if a.
|
151
|
+
return filter_by_array(Arrow::BooleanArray.new(a)) if a.boolean?
|
59
152
|
|
60
153
|
raise DataFrameArgumentError, "invalid arguments: #{args}"
|
61
154
|
end
|
@@ -64,17 +157,27 @@ module RedAmber
|
|
64
157
|
return filter_by_array(arrow_array) if arrow_array.boolean?
|
65
158
|
|
66
159
|
a = arrow_array.to_a
|
67
|
-
return select_variables_by_keys(a) if a.
|
160
|
+
return select_variables_by_keys(a) if a.symbol_or_string?
|
68
161
|
|
69
162
|
raise DataFrameArgumentError, "invalid arguments: #{args}"
|
70
163
|
end
|
71
164
|
|
72
|
-
# Select a variable by
|
165
|
+
# Select a variable by String or Symbol and return as a Vector.
|
166
|
+
#
|
167
|
+
# @param key [Symbol, String]
|
168
|
+
# key name to select.
|
169
|
+
# @return [Vector]
|
170
|
+
# selected variable as a Vector.
|
171
|
+
# @note #v(key) is faster then #[](key).
|
172
|
+
# @example Select a column and return Vector
|
173
|
+
# penguins.v(:bill_length_mm)
|
174
|
+
#
|
175
|
+
# # =>
|
176
|
+
# #<RedAmber::Vector(:double, size=344):0x000000000000f140>
|
177
|
+
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
178
|
+
#
|
73
179
|
def v(key)
|
74
|
-
|
75
|
-
raise DataFrameArgumentError, "Key is not a Symbol or a String: [#{key}]"
|
76
|
-
end
|
77
|
-
raise DataFrameArgumentError, "Key does not exist: [#{key}]" unless key? key
|
180
|
+
raise DataFrameArgumentError, "Key does not exist: [#{key}]" unless key?(key)
|
78
181
|
|
79
182
|
variables[key.to_sym]
|
80
183
|
end
|
@@ -82,30 +185,168 @@ module RedAmber
|
|
82
185
|
# Select records to create a DataFrame.
|
83
186
|
#
|
84
187
|
# @overload slice(row)
|
85
|
-
#
|
188
|
+
# Select a record and return a DataFrame.
|
86
189
|
#
|
87
|
-
# @param row [Indeger, Float
|
190
|
+
# @param row [Indeger, Float]
|
88
191
|
# a row index to select.
|
89
|
-
# @
|
90
|
-
#
|
91
|
-
#
|
92
|
-
#
|
93
|
-
#
|
94
|
-
#
|
192
|
+
# @return [DataFrame]
|
193
|
+
# selected records as a DataFrame.
|
194
|
+
# @example Select a row
|
195
|
+
# penguins
|
196
|
+
#
|
197
|
+
# # =>
|
198
|
+
# #<RedAmber::DataFrame : 344 x 8 Vectors, 0x00000000000039bc>
|
199
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
200
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
201
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
202
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
203
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
204
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
205
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
206
|
+
# : : : : : : ... :
|
207
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
208
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
209
|
+
# 343 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
210
|
+
# penguins.slice(2)
|
211
|
+
#
|
212
|
+
# # =>
|
213
|
+
# #<RedAmber::DataFrame : 1 x 8 Vectors, 0x00000000000039d0>
|
214
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
215
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
216
|
+
# 0 Adelie Torgersen 40.3 18.0 195 ... 2007
|
95
217
|
#
|
96
218
|
# @overload slice(rows)
|
97
|
-
#
|
219
|
+
# Select records and return a DataFrame.
|
98
220
|
# - Duplicated selection is acceptable. The same record will be returned.
|
99
221
|
# - The order of records will be the same as specified indices.
|
100
222
|
#
|
101
|
-
# @param rows [Integer
|
223
|
+
# @param rows [<Integer>, <Float>, Range<Integer>, Vector, Arrow::Array]
|
102
224
|
# row indeces to select.
|
103
|
-
# @
|
104
|
-
#
|
105
|
-
#
|
106
|
-
#
|
225
|
+
# @return [DataFrame]
|
226
|
+
# selected records as a DataFrame.
|
227
|
+
# @example Select rows
|
228
|
+
# penguins.slice(300..-1)
|
229
|
+
#
|
230
|
+
# # =>
|
231
|
+
# #<RedAmber::DataFrame : 44 x 8 Vectors, 0x000000000000fb54>
|
232
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
233
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
234
|
+
# 0 Gentoo Biscoe 49.1 14.5 212 ... 2009
|
235
|
+
# 1 Gentoo Biscoe 52.5 15.6 221 ... 2009
|
236
|
+
# 2 Gentoo Biscoe 47.4 14.6 212 ... 2009
|
237
|
+
# 3 Gentoo Biscoe 50.0 15.9 224 ... 2009
|
238
|
+
# 4 Gentoo Biscoe 44.9 13.8 212 ... 2009
|
239
|
+
# : : : : : : ... :
|
240
|
+
# 41 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
241
|
+
# 42 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
242
|
+
# 43 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
243
|
+
#
|
244
|
+
# @overload slice(enumerator)
|
245
|
+
# Select records and return a DataFrame.
|
246
|
+
# - Duplicated selection is acceptable. The same record will be returned.
|
247
|
+
# - The order of records will be the same as specified indices.
|
248
|
+
#
|
249
|
+
# @param enumerator [Enumerator]
|
250
|
+
# an enumerator which returns row indeces to select.
|
251
|
+
# @return [DataFrame]
|
252
|
+
# selected records as a DataFrame.
|
253
|
+
# @example Select rows by Enumerator.
|
254
|
+
# penguins.assign_left(index: penguins.indices) # 0.2.0 feature
|
255
|
+
# .slice(0.step(by: 10, to: 340))
|
256
|
+
#
|
257
|
+
# # =>
|
258
|
+
# #<RedAmber::DataFrame : 35 x 9 Vectors, 0x000000000000f2e4>
|
259
|
+
# index species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
260
|
+
# <uint16> <string> <string> <double> <double> <uint8> ... <uint16>
|
261
|
+
# 0 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
262
|
+
# 1 10 Adelie Torgersen 37.8 17.1 186 ... 2007
|
263
|
+
# 2 20 Adelie Biscoe 37.8 18.3 174 ... 2007
|
264
|
+
# 3 30 Adelie Dream 39.5 16.7 178 ... 2007
|
265
|
+
# 4 40 Adelie Dream 36.5 18.0 182 ... 2007
|
266
|
+
# : : : : : : : ... :
|
267
|
+
# 32 320 Gentoo Biscoe 48.5 15.0 219 ... 2009
|
268
|
+
# 33 330 Gentoo Biscoe 50.5 15.2 216 ... 2009
|
269
|
+
# 34 340 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
270
|
+
#
|
271
|
+
# @overload slice
|
272
|
+
# Select records by indices with block and return a DataFrame.
|
273
|
+
# - Duplicated selection is acceptable. The same record will be returned.
|
274
|
+
# - The order of records will be the same as specified indices.
|
275
|
+
#
|
276
|
+
# @yieldparam self [DataFrame]
|
277
|
+
# gives self to the block.
|
278
|
+
# The block is evaluated within the context of self.
|
279
|
+
# @yieldreturn [<Integer>, <Float>, Range<Integer>, Vector, Arrow::Array, Enumerator]
|
107
280
|
# row indeces to select.
|
108
|
-
# @return [DataFrame]
|
281
|
+
# @return [DataFrame]
|
282
|
+
# selected records as a DataFrame.
|
283
|
+
# @example Select rows by block
|
284
|
+
# penguins.assign_left(index: penguins.indices) # 0.2.0 feature
|
285
|
+
# .slice { 0.step(by: 100, to: 300).map { |i| i..(i+1) } }
|
286
|
+
#
|
287
|
+
# # =>
|
288
|
+
# #<RedAmber::DataFrame : 8 x 9 Vectors, 0x000000000000f3ac>
|
289
|
+
# index species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
290
|
+
# <uint16> <string> <string> <double> <double> <uint8> ... <uint16>
|
291
|
+
# 0 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
292
|
+
# 1 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
293
|
+
# 2 100 Adelie Biscoe 35.0 17.9 192 ... 2009
|
294
|
+
# 3 101 Adelie Biscoe 41.0 20.0 203 ... 2009
|
295
|
+
# 4 200 Chinstrap Dream 51.5 18.7 187 ... 2009
|
296
|
+
# 5 201 Chinstrap Dream 49.8 17.3 198 ... 2009
|
297
|
+
# 6 300 Gentoo Biscoe 49.1 14.5 212 ... 2009
|
298
|
+
# 7 301 Gentoo Biscoe 52.5 15.6 221 ... 2009
|
299
|
+
#
|
300
|
+
# @overload slice(booleans)
|
301
|
+
# Select records by filtering with booleans and return a DataFrame.
|
302
|
+
#
|
303
|
+
# @param booleans [<Boolean, nil>, Vector, Arrow::Array]
|
304
|
+
# a boolean filter.
|
305
|
+
# @return [DataFrame]
|
306
|
+
# filtered records as a DataFrame.
|
307
|
+
# @example Select rows by boolean filter
|
308
|
+
# penguins.slice(penguins[:bill_length_mm] > 50)
|
309
|
+
#
|
310
|
+
# # =>
|
311
|
+
# #<RedAmber::DataFrame : 52 x 8 Vectors, 0x000000000000fd98>
|
312
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
313
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
314
|
+
# 0 Chinstrap Dream 51.3 19.2 193 ... 2007
|
315
|
+
# 1 Chinstrap Dream 52.7 19.8 197 ... 2007
|
316
|
+
# 2 Chinstrap Dream 51.3 18.2 197 ... 2007
|
317
|
+
# 3 Chinstrap Dream 51.3 19.9 198 ... 2007
|
318
|
+
# 4 Chinstrap Dream 51.7 20.3 194 ... 2007
|
319
|
+
# : : : : : : ... :
|
320
|
+
# 49 Gentoo Biscoe 51.5 16.3 230 ... 2009
|
321
|
+
# 50 Gentoo Biscoe 55.1 16.0 230 ... 2009
|
322
|
+
# 51 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
323
|
+
#
|
324
|
+
# @overload slice
|
325
|
+
# Select records by filtering with block and return a DataFrame.
|
326
|
+
#
|
327
|
+
# @yieldparam self [DataFrame]
|
328
|
+
# gives self to the block.
|
329
|
+
# The block is evaluated within the context of self.
|
330
|
+
# @yieldreturn [<Boolean, nil>, Vector, Arrow::Array]
|
331
|
+
# a boolean filter. `Vector` or `Arrow::Array` must be boolean type.
|
332
|
+
# @return [DataFrame]
|
333
|
+
# filtered records as a DataFrame.
|
334
|
+
# @example Select rows by booleans from block
|
335
|
+
# penguins.slice { indices.map(&:even?) }
|
336
|
+
#
|
337
|
+
# # =>
|
338
|
+
# #<RedAmber::DataFrame : 172 x 8 Vectors, 0x000000000000ff78>
|
339
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
340
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
341
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
342
|
+
# 1 Adelie Torgersen 40.3 18.0 195 ... 2007
|
343
|
+
# 2 Adelie Torgersen 36.7 19.3 193 ... 2007
|
344
|
+
# 3 Adelie Torgersen 38.9 17.8 181 ... 2007
|
345
|
+
# 4 Adelie Torgersen 34.1 18.1 193 ... 2007
|
346
|
+
# : : : : : : ... :
|
347
|
+
# 169 Gentoo Biscoe 47.2 13.7 214 ... 2009
|
348
|
+
# 170 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
349
|
+
# 171 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
109
350
|
#
|
110
351
|
def slice(*args, &block)
|
111
352
|
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
@@ -142,6 +383,73 @@ module RedAmber
|
|
142
383
|
end
|
143
384
|
end
|
144
385
|
|
386
|
+
# Select records by a column specified by a key
|
387
|
+
# and corresponding record with a block.
|
388
|
+
#
|
389
|
+
# @overload slice_by(key)
|
390
|
+
# Select records by elements.
|
391
|
+
#
|
392
|
+
# @param key [Symbol, String]
|
393
|
+
# a key to select column.
|
394
|
+
# @param keep_key [true, false]
|
395
|
+
# preserve column specified by key in the result if true.
|
396
|
+
# @yieldparam self [DataFrame]
|
397
|
+
# gives self to the block.
|
398
|
+
# The block is evaluated within the context of self.
|
399
|
+
# @yieldreturn [<elements>]
|
400
|
+
# array of elements to select.
|
401
|
+
# @return [DataFrame]
|
402
|
+
# selected records as a DataFrame.
|
403
|
+
# @example Select records by elements
|
404
|
+
# df
|
405
|
+
#
|
406
|
+
# # =>
|
407
|
+
# #<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
|
408
|
+
# index float string
|
409
|
+
# <uint8> <double> <string>
|
410
|
+
# 0 0 0.0 A
|
411
|
+
# 1 1 1.1 B
|
412
|
+
# 2 2 2.2 C
|
413
|
+
# 3 3 NaN D
|
414
|
+
# 4 (nil) (nil) (nil)
|
415
|
+
#
|
416
|
+
# df.slice_by(:string) { ["A", "C"] }
|
417
|
+
#
|
418
|
+
# # =>
|
419
|
+
# #<RedAmber::DataFrame : 2 x 2 Vectors, 0x000000000001b1ac>
|
420
|
+
# index float
|
421
|
+
# <uint8> <double>
|
422
|
+
# 0 0 0.0
|
423
|
+
# 1 2 2.2
|
424
|
+
#
|
425
|
+
# @overload slice_by(key)
|
426
|
+
# Select records by elements range.
|
427
|
+
#
|
428
|
+
# @param key [Symbol, String]
|
429
|
+
# a key to select column.
|
430
|
+
# @param keep_key [true, false]
|
431
|
+
# preserve column specified by key in the result if true.
|
432
|
+
# @yieldparam self [DataFrame]
|
433
|
+
# gives self to the block.
|
434
|
+
# The block is evaluated within the context of self.
|
435
|
+
# @yieldreturn [Range]
|
436
|
+
# specifies position of elements at the start and the end and
|
437
|
+
# select records between them.
|
438
|
+
# @return [DataFrame]
|
439
|
+
# selected records as a DataFrame.
|
440
|
+
# @example Select records by elements range
|
441
|
+
# df.slice_by(:string) { "A".."C" }
|
442
|
+
#
|
443
|
+
# # =>
|
444
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000069668>
|
445
|
+
# index float
|
446
|
+
# <uint8> <double>
|
447
|
+
# 0 0 0.0
|
448
|
+
# 1 1 1.1
|
449
|
+
# 2 2 2.2
|
450
|
+
#
|
451
|
+
# @since 0.2.1
|
452
|
+
#
|
145
453
|
def slice_by(key, keep_key: false, &block)
|
146
454
|
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
147
455
|
raise DataFrameArgumentError, 'No block given' unless block
|
@@ -183,33 +491,242 @@ module RedAmber
|
|
183
491
|
keep_key ? taken : taken.drop(key)
|
184
492
|
end
|
185
493
|
|
494
|
+
# Select records by filtering with booleans to create a DataFrame.
|
495
|
+
#
|
496
|
+
# @overload filter(booleans)
|
497
|
+
# Select records by filtering with booleans and return a DataFrame.
|
498
|
+
#
|
499
|
+
# @param booleans [<Boolean, nil>, Vector, Arrow::Array]
|
500
|
+
# a boolean filter.
|
501
|
+
# @return [DataFrame]
|
502
|
+
# filtered records as a DataFrame.
|
503
|
+
# @example Filter by boolean Vector
|
504
|
+
# penguins
|
505
|
+
#
|
506
|
+
# # =>
|
507
|
+
# #<RedAmber::DataFrame : 344 x 8 Vectors, 0x00000000000039bc>
|
508
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
509
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
510
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
511
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
512
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
513
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
514
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
515
|
+
# : : : : : : ... :
|
516
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
517
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
518
|
+
# 343 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
519
|
+
#
|
520
|
+
# penguins.filter(penguins.bill_length_mm < 50)
|
521
|
+
#
|
522
|
+
# # =>
|
523
|
+
# #<RedAmber::DataFrame : 285 x 8 Vectors, 0x00000000000101a8>
|
524
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
525
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
526
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
527
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
528
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
529
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
530
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
531
|
+
# : : : : : : ... :
|
532
|
+
# 282 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
533
|
+
# 283 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
534
|
+
# 284 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
535
|
+
#
|
536
|
+
# @overload filter
|
537
|
+
# Select records by filtering with block and return a DataFrame.
|
538
|
+
#
|
539
|
+
# @yieldparam self [DataFrame]
|
540
|
+
# gives self to the block.
|
541
|
+
# The block is evaluated within the context of self.
|
542
|
+
# @yieldreturn [<Boolean, nil>, Vector, Arrow::Array]
|
543
|
+
# a boolean filter. `Vector` or `Arrow::Array` must be boolean type.
|
544
|
+
# @return [DataFrame]
|
545
|
+
# filtered records as a DataFrame.
|
546
|
+
# @example Filter by boolean Vector
|
547
|
+
# penguins.filter { bill_length_mm < 50 }
|
548
|
+
#
|
549
|
+
# # =>
|
550
|
+
# #<RedAmber::DataFrame : 285 x 8 Vectors, 0x00000000000101bc>
|
551
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
552
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
553
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
554
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
555
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
556
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
557
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
558
|
+
# : : : : : : ... :
|
559
|
+
# 282 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
560
|
+
# 283 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
561
|
+
# 284 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
562
|
+
#
|
563
|
+
def filter(*booleans, &block)
|
564
|
+
booleans.flatten!
|
565
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
566
|
+
|
567
|
+
if block
|
568
|
+
unless booleans.empty?
|
569
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.'
|
570
|
+
end
|
571
|
+
|
572
|
+
booleans = [instance_eval(&block)]
|
573
|
+
end
|
574
|
+
|
575
|
+
case booleans
|
576
|
+
in [] | [[]]
|
577
|
+
return remove_all_values
|
578
|
+
in [Vector => v] if v.boolean?
|
579
|
+
filter_by_array(v.data)
|
580
|
+
in [Arrow::ChunkedArray => ca] if ca.boolean?
|
581
|
+
filter_by_array(ca)
|
582
|
+
in [Arrow::BooleanArray => b]
|
583
|
+
filter_by_array(b)
|
584
|
+
else
|
585
|
+
a = Arrow::Array.new(parse_args(booleans, size))
|
586
|
+
unless a.boolean?
|
587
|
+
raise DataFrameArgumentError, "not a boolean filter: #{booleans}"
|
588
|
+
end
|
589
|
+
|
590
|
+
filter_by_array(a)
|
591
|
+
end
|
592
|
+
end
|
593
|
+
|
186
594
|
# Select records and remove them to create a remainer DataFrame.
|
187
595
|
#
|
188
596
|
# @overload remove(row)
|
189
|
-
#
|
597
|
+
# Select a record and remove it to create a remainer DataFrame.
|
190
598
|
# - The order of records in self will be preserved.
|
191
599
|
#
|
192
|
-
# @param row [Indeger, Float
|
600
|
+
# @param row [Indeger, Float]
|
193
601
|
# a row index to remove.
|
194
|
-
# @
|
195
|
-
#
|
196
|
-
#
|
197
|
-
#
|
198
|
-
#
|
199
|
-
#
|
602
|
+
# @return [DataFrame]
|
603
|
+
# remainer variables as a DataFrame.
|
604
|
+
# @example Remove a row
|
605
|
+
# penguins.remove(-1)
|
606
|
+
#
|
607
|
+
# # =>
|
608
|
+
# #<RedAmber::DataFrame : 343 x 8 Vectors, 0x0000000000010310>
|
609
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
610
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
611
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
612
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
613
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
614
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
615
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
616
|
+
# : : : : : : ... :
|
617
|
+
# 340 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
618
|
+
# 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
619
|
+
# 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
200
620
|
#
|
201
621
|
# @overload remove(rows)
|
202
|
-
#
|
622
|
+
# Select records and remove them to create a remainer DataFrame.
|
623
|
+
# - Duplicated selection is acceptable.
|
203
624
|
# - The order of records in self will be preserved.
|
204
625
|
#
|
205
|
-
# @param rows [
|
626
|
+
# @param rows [<Integer>, <Float>, Range<Integer>, Vector, Arrow::Array]
|
206
627
|
# row indeces to remove.
|
207
|
-
# @
|
208
|
-
#
|
209
|
-
#
|
210
|
-
#
|
628
|
+
# @return [DataFrame]
|
629
|
+
# remainer variables as a DataFrame.
|
630
|
+
# @example Remove rows
|
631
|
+
# penguins.remove(100..200)
|
632
|
+
#
|
633
|
+
# # =>
|
634
|
+
# #<RedAmber::DataFrame : 243 x 8 Vectors, 0x0000000000010450>
|
635
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
636
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
637
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
638
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
639
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
640
|
+
# 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
641
|
+
# 4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
642
|
+
# : : : : : : ... :
|
643
|
+
# 240 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
644
|
+
# 241 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
645
|
+
# 242 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
646
|
+
#
|
647
|
+
# @overload remove
|
648
|
+
# Select records by indices from block
|
649
|
+
# and remove them to create a remainer DataFrame.
|
650
|
+
# - Duplicated selection is acceptable.
|
651
|
+
# - The order of records in self will be preserved.
|
652
|
+
#
|
653
|
+
# @yieldparam self [DataFrame]
|
654
|
+
# gives self to the block.
|
655
|
+
# The block is evaluated within the context of self.
|
656
|
+
# @yieldreturn [<Integer, Float>, Range<Integer>, Vector, Arrow::Array]
|
211
657
|
# row indeces to remove.
|
212
|
-
# @return [DataFrame]
|
658
|
+
# @return [DataFrame]
|
659
|
+
# remainer variables as a DataFrame.
|
660
|
+
# @example Remove rows by indices from block
|
661
|
+
# penguins.remove { 0.step(size, 10) }
|
662
|
+
#
|
663
|
+
# # =>
|
664
|
+
# #<RedAmber::DataFrame : 309 x 8 Vectors, 0x00000000000104c8>
|
665
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
666
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
667
|
+
# 0 Adelie Torgersen 39.5 17.4 186 ... 2007
|
668
|
+
# 1 Adelie Torgersen 40.3 18.0 195 ... 2007
|
669
|
+
# 2 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
670
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
671
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
672
|
+
# : : : : : : ... :
|
673
|
+
# 306 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
674
|
+
# 307 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
675
|
+
# 308 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
676
|
+
#
|
677
|
+
# @overload remove(booleans)
|
678
|
+
# Select records by filtering with booleans and return a DataFrame.
|
679
|
+
# - The order of records in self will be preserved.
|
680
|
+
#
|
681
|
+
# @param booleans [<Boolean, nil>, Vector, Arrow::Array]
|
682
|
+
# a boolean filter to remove.
|
683
|
+
# @return [DataFrame]
|
684
|
+
# remainer records as a DataFrame.
|
685
|
+
# @example Remove rows by boolean filter
|
686
|
+
# penguins.remove(penguins.bill_length_mm.is_nil)
|
687
|
+
#
|
688
|
+
# # =>
|
689
|
+
# #<RedAmber::DataFrame : 342 x 8 Vectors, 0x0000000000010234>
|
690
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
691
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
692
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
693
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
694
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
695
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
696
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
697
|
+
# : : : : : : ... :
|
698
|
+
# 339 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
699
|
+
# 340 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
700
|
+
# 341 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
701
|
+
#
|
702
|
+
# @overload remove
|
703
|
+
# Select records by booleans from block
|
704
|
+
# and remove them to create a remainer DataFrame.
|
705
|
+
# - The order of records in self will be preserved.
|
706
|
+
#
|
707
|
+
# @yieldparam self [DataFrame]
|
708
|
+
# gives self to the block.
|
709
|
+
# The block is evaluated within the context of self.
|
710
|
+
# @yieldreturn [<Boolean, nil>, Vector, Arrow::Array]
|
711
|
+
# a boolean filter to remove. `Vector` or `Arrow::Array` must be boolean type.
|
712
|
+
# @return [DataFrame]
|
713
|
+
# remainer records as a DataFrame.
|
714
|
+
# @example Remove rows by booleans from block
|
715
|
+
# penguins.remove { (species == 'Adelie') | (year == 2009) }
|
716
|
+
#
|
717
|
+
# # =>
|
718
|
+
# #<RedAmber::DataFrame : 124 x 8 Vectors, 0x00000000000102fc>
|
719
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
720
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
721
|
+
# 0 Chinstrap Dream 46.5 17.9 192 ... 2007
|
722
|
+
# 1 Chinstrap Dream 50.0 19.5 196 ... 2007
|
723
|
+
# 2 Chinstrap Dream 51.3 19.2 193 ... 2007
|
724
|
+
# 3 Chinstrap Dream 45.4 18.7 188 ... 2007
|
725
|
+
# 4 Chinstrap Dream 52.7 19.8 197 ... 2007
|
726
|
+
# : : : : : : ... :
|
727
|
+
# 121 Gentoo Biscoe 51.1 16.3 220 ... 2008
|
728
|
+
# 122 Gentoo Biscoe 45.2 13.8 215 ... 2008
|
729
|
+
# 123 Gentoo Biscoe 45.2 16.4 223 ... 2008
|
213
730
|
#
|
214
731
|
def remove(*args, &block)
|
215
732
|
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
@@ -249,57 +766,93 @@ module RedAmber
|
|
249
766
|
end
|
250
767
|
end
|
251
768
|
|
769
|
+
# Remove records (rows) contains any nil.
|
770
|
+
#
|
771
|
+
# @return [DataFrame]
|
772
|
+
# removed DataFrame.
|
773
|
+
# @example
|
774
|
+
# penguins.remove_nil
|
775
|
+
# # =>
|
776
|
+
# #<RedAmber::DataFrame : 333 x 8 Vectors, 0x00000000000039d0>
|
777
|
+
# species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
778
|
+
# <string> <string> <double> <double> <uint8> ... <uint16>
|
779
|
+
# 0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
780
|
+
# 1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
781
|
+
# 2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
782
|
+
# 3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
783
|
+
# 4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
784
|
+
# : : : : : : ... :
|
785
|
+
# 330 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
786
|
+
# 331 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
787
|
+
# 332 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
788
|
+
#
|
252
789
|
def remove_nil
|
253
790
|
func = Arrow::Function.find(:drop_null)
|
254
791
|
DataFrame.create(func.execute([table]).value)
|
255
792
|
end
|
256
793
|
alias_method :drop_nil, :remove_nil
|
257
794
|
|
795
|
+
# Select records from the top.
|
796
|
+
#
|
797
|
+
# @param n_obs [Integer]
|
798
|
+
# number of records to select.
|
799
|
+
# @return [DataFrame]
|
800
|
+
#
|
258
801
|
def head(n_obs = 5)
|
259
802
|
raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
|
260
803
|
|
261
804
|
self[0...[n_obs, size].min]
|
262
805
|
end
|
263
806
|
|
807
|
+
# Select records from the end.
|
808
|
+
#
|
809
|
+
# @param n_obs [Integer]
|
810
|
+
# number of records to select.
|
811
|
+
# @return [DataFrame]
|
812
|
+
#
|
264
813
|
def tail(n_obs = 5)
|
265
814
|
raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
|
266
815
|
|
267
816
|
self[-[n_obs, size].min..]
|
268
817
|
end
|
269
818
|
|
819
|
+
# Select records from the top.
|
820
|
+
#
|
821
|
+
# @param n_obs [Integer]
|
822
|
+
# number of records to select.
|
823
|
+
# @return [DataFrame]
|
824
|
+
#
|
270
825
|
def first(n_obs = 1)
|
271
826
|
head(n_obs)
|
272
827
|
end
|
273
828
|
|
829
|
+
# Select records from the end.
|
830
|
+
#
|
831
|
+
# @param n_obs [Integer]
|
832
|
+
# number of records to select.
|
833
|
+
# @return [DataFrame]
|
834
|
+
#
|
274
835
|
def last(n_obs = 1)
|
275
836
|
tail(n_obs)
|
276
837
|
end
|
277
838
|
|
839
|
+
# Select records by index Array to create a DataFrame.
|
840
|
+
#
|
841
|
+
# - TODO: support for option `boundscheck: true`
|
842
|
+
# - Supports indices in an Arrow::UInt8, UInt16, Uint32, Uint64 or an Array
|
843
|
+
# - Negative index is not supported.
|
844
|
+
# @param index_array [<Integer>, Arrow::Array]
|
845
|
+
# row indeces to select.
|
846
|
+
# @return [DataFrame]
|
847
|
+
# selected variables as a DataFrame.
|
848
|
+
#
|
278
849
|
# @api private
|
279
|
-
#
|
280
|
-
# Supports indices in an Arrow::UInt{8, 16, 32, 64} or an Array
|
281
|
-
# Negative index is not supported.
|
850
|
+
#
|
282
851
|
def take(index_array)
|
283
852
|
DataFrame.create(@table.take(index_array))
|
284
853
|
end
|
285
854
|
|
286
|
-
#
|
287
|
-
# TODO: support for option `null_selection_behavior: :drop``
|
288
|
-
def filter(*booleans)
|
289
|
-
booleans.flatten!
|
290
|
-
case booleans
|
291
|
-
in []
|
292
|
-
return remove_all_values
|
293
|
-
in [Arrow::BooleanArray => b]
|
294
|
-
filter_by_array(b)
|
295
|
-
else
|
296
|
-
unless booleans.booleans?
|
297
|
-
raise DataFrameArgumentError, 'Argument is not a boolean.'
|
298
|
-
end
|
299
|
-
|
300
|
-
filter_by_array(Arrow::BooleanArray.new(booleans))
|
301
|
-
end
|
302
|
-
end
|
855
|
+
# rubocop:enable Layout/LineLength
|
303
856
|
|
304
857
|
private
|
305
858
|
|