red_amber 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +24 -5
- data/CHANGELOG.md +98 -13
- data/Gemfile +1 -0
- data/README.md +55 -6
- data/doc/DataFrame.md +23 -9
- data/doc/Vector.md +156 -24
- data/lib/red-amber.rb +27 -0
- data/lib/red_amber/data_frame.rb +39 -7
- data/lib/red_amber/data_frame_displayable.rb +8 -8
- data/lib/red_amber/data_frame_observation_operation.rb +0 -72
- data/lib/red_amber/data_frame_selectable.rb +151 -32
- data/lib/red_amber/data_frame_variable_operation.rb +4 -0
- data/lib/red_amber/helper.rb +61 -0
- data/lib/red_amber/vector.rb +42 -12
- data/lib/red_amber/vector_functions.rb +25 -18
- data/lib/red_amber/vector_selectable.rb +124 -0
- data/lib/red_amber/{vector_compensable.rb → vector_updatable.rb} +52 -16
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -24
- metadata +6 -4
- data/lib/red_amber/data_frame_helper.rb +0 -64
data/doc/Vector.md
CHANGED
@@ -18,6 +18,13 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
18
18
|
|
19
19
|
```ruby
|
20
20
|
vector = RedAmber::Vector.new([1, 2, 3])
|
21
|
+
# or
|
22
|
+
vector = RedAmber::Vector.new(1, 2, 3)
|
23
|
+
# or
|
24
|
+
vector = RedAmber::Vector.new(1..3)
|
25
|
+
# or
|
26
|
+
vector = RedAmber::Vector.new(Arrow::Array([1, 2, 3])
|
27
|
+
|
21
28
|
# =>
|
22
29
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
|
23
30
|
[1, 2, 3]
|
@@ -29,8 +36,24 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
29
36
|
|
30
37
|
### `values`, `to_a`, `entries`
|
31
38
|
|
39
|
+
### `indices`, `indexes`, `indeces`
|
40
|
+
|
41
|
+
Return indices in an Array
|
42
|
+
|
43
|
+
### `to_ary`
|
44
|
+
Vector has `#to_ary`. It implicitly converts a Vector to an Array when required.
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
[1, 2] + Vector.new([3, 4])
|
48
|
+
|
49
|
+
# =>
|
50
|
+
[1, 2, 3, 4]
|
51
|
+
```
|
52
|
+
|
32
53
|
### `size`, `length`, `n_rows`, `nrow`
|
33
54
|
|
55
|
+
### `empty?`
|
56
|
+
|
34
57
|
### `type`
|
35
58
|
|
36
59
|
### `boolean?`, `numeric?`, `string?`, `temporal?`
|
@@ -49,6 +72,10 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
49
72
|
|
50
73
|
- `n_nulls` is an alias of `n_nils`
|
51
74
|
|
75
|
+
### `has_nil?`
|
76
|
+
|
77
|
+
Returns `true` if self has any `nil`. Otherwise returns `false`.
|
78
|
+
|
52
79
|
### `inspect(limit: 80)`
|
53
80
|
|
54
81
|
- `limit` sets size limit to display long array.
|
@@ -60,6 +87,47 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
60
87
|
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ... ]
|
61
88
|
```
|
62
89
|
|
90
|
+
## Selecting Values
|
91
|
+
|
92
|
+
### `take(indices)`, `[](indices)`
|
93
|
+
|
94
|
+
- Acceptable class for indices:
|
95
|
+
- Integer, Float
|
96
|
+
- Vector of integer or float
|
97
|
+
- Arrow::Arry of integer or float
|
98
|
+
- Negative index is also OK like the Ruby's primitive Array.
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
array = RedAmber::Vector.new(%w[A B C D E])
|
102
|
+
indices = RedAmber::Vector.new([0.1, -0.5, -5.1])
|
103
|
+
array.take(indices)
|
104
|
+
# or
|
105
|
+
array[indices]
|
106
|
+
|
107
|
+
# =>
|
108
|
+
#<RedAmber::Vector(:string, size=3):0x000000000000f820>
|
109
|
+
["A", "E", "A"]
|
110
|
+
```
|
111
|
+
|
112
|
+
### `filter(booleans)`, `[](booleans)`
|
113
|
+
|
114
|
+
- Acceptable class for booleans:
|
115
|
+
- An array of true, false, or nil
|
116
|
+
- Boolean Vector
|
117
|
+
- Arrow::BooleanArray
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
array = RedAmber::Vector.new(%w[A B C D E])
|
121
|
+
booleans = [true, false, nil, false, true]
|
122
|
+
array.filter(booleans)
|
123
|
+
# or
|
124
|
+
array[booleans]
|
125
|
+
|
126
|
+
# =>
|
127
|
+
#<RedAmber::Vector(:string, size=2):0x000000000000f21c>
|
128
|
+
["A", "E"]
|
129
|
+
```
|
130
|
+
|
63
131
|
## Functions
|
64
132
|
|
65
133
|
### Unary aggregations: `vector.func => scalar`
|
@@ -68,8 +136,8 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
68
136
|
|
69
137
|
| Method |Boolean|Numeric|String|Options|Remarks|
|
70
138
|
| ----------- | --- | --- | --- | --- | --- |
|
71
|
-
| ✓ `all
|
72
|
-
| ✓ `any
|
139
|
+
| ✓ `all?` | ✓ | | | ✓ ScalarAggregate| alias `all` |
|
140
|
+
| ✓ `any?` | ✓ | | | ✓ ScalarAggregate| alias `any` |
|
73
141
|
| ✓ `approximate_median`| |✓| | ✓ ScalarAggregate| alias `median`|
|
74
142
|
| ✓ `count` | ✓ | ✓ | ✓ | ✓ Count | |
|
75
143
|
| ✓ `count_distinct`| ✓ | ✓ | ✓ | ✓ Count |alias `count_uniq`|
|
@@ -203,6 +271,9 @@ boolean.all(opts: {skip_nulls: false}) #=> false
|
|
203
271
|
vector.tally #=> {NaN=>2}
|
204
272
|
vector.value_counts #=> {NaN=>2}
|
205
273
|
```
|
274
|
+
### `index(element)`
|
275
|
+
|
276
|
+
Returns index of specified element.
|
206
277
|
|
207
278
|
### `sort_indexes`, `sort_indices`, `array_sort_indices`
|
208
279
|
|
@@ -218,39 +289,40 @@ boolean.all(opts: {skip_nulls: false}) #=> false
|
|
218
289
|
## Coerce (not impremented)
|
219
290
|
|
220
291
|
## Update vector's value
|
221
|
-
### `
|
292
|
+
### `replace(specifier, replacer)` => vector
|
222
293
|
|
223
|
-
- Accepts Vector, Array, Arrow::Array
|
224
|
-
|
225
|
-
-
|
226
|
-
-
|
227
|
-
|
294
|
+
- Accepts Scalar, Range of Integer, Vector, Array, Arrow::Array as a specifier
|
295
|
+
- Accepts Scalar, Vector, Array and Arrow::Array as a replacer.
|
296
|
+
- Boolean specifiers specify the position of replacer in true.
|
297
|
+
- Index specifiers specify the position of replacer in indices.
|
298
|
+
- replacer specifies the values to be replaced.
|
299
|
+
- The number of true in booleans must be equal to the length of replacer
|
228
300
|
|
229
301
|
```ruby
|
230
302
|
vector = RedAmber::Vector.new([1, 2, 3])
|
231
303
|
booleans = [true, false, true]
|
232
|
-
|
233
|
-
vector.
|
304
|
+
replacer = [4, 5]
|
305
|
+
vector.replace(booleans, replacer)
|
234
306
|
# =>
|
235
307
|
#<RedAmber::Vector(:uint8, size=3):0x000000000001ee10>
|
236
308
|
[4, 2, 5]
|
237
309
|
```
|
238
310
|
|
239
|
-
- Scalar value in
|
311
|
+
- Scalar value in replacer can be broadcasted.
|
240
312
|
|
241
313
|
```ruby
|
242
|
-
|
243
|
-
vector.
|
314
|
+
replacer = 0
|
315
|
+
vector.replace(booleans, replacer)
|
244
316
|
# =>
|
245
317
|
#<RedAmber::Vector(:uint8, size=3):0x000000000001ee10>
|
246
318
|
[0, 2, 0]
|
247
319
|
```
|
248
320
|
|
249
|
-
- Returned data type is automatically up-casted by
|
321
|
+
- Returned data type is automatically up-casted by replacer.
|
250
322
|
|
251
323
|
```ruby
|
252
|
-
|
253
|
-
vector.
|
324
|
+
replacer = 1.0
|
325
|
+
vector.replace(booleans, replacer)
|
254
326
|
# =>
|
255
327
|
#<RedAmber::Vector(:double, size=3):0x0000000000025d78>
|
256
328
|
[1.0, 2.0, 1.0]
|
@@ -260,29 +332,29 @@ vector.replace_with(booleans, replacement)
|
|
260
332
|
|
261
333
|
```ruby
|
262
334
|
booleans = [true, false, nil]
|
263
|
-
|
264
|
-
vec.
|
335
|
+
replacer = -1
|
336
|
+
vec.replace(booleans, replacer)
|
265
337
|
=>
|
266
338
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
267
339
|
[-1, 2, nil]
|
268
340
|
```
|
269
341
|
|
270
|
-
-
|
342
|
+
- replacer can have nil in it.
|
271
343
|
|
272
344
|
```ruby
|
273
345
|
booleans = [true, false, true]
|
274
|
-
|
275
|
-
vec.
|
346
|
+
replacer = [nil]
|
347
|
+
vec.replace(booleans, replacer)
|
276
348
|
=>
|
277
349
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
278
350
|
[nil, 2, nil]
|
279
351
|
```
|
280
352
|
|
281
|
-
- If no
|
353
|
+
- If no replacer specified, it is same as to specify nil.
|
282
354
|
|
283
355
|
```ruby
|
284
356
|
booleans = [true, false, true]
|
285
|
-
vec.
|
357
|
+
vec.replace(booleans)
|
286
358
|
=>
|
287
359
|
#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
|
288
360
|
[nil, 2, nil]
|
@@ -292,12 +364,27 @@ vec.replace_with(booleans)
|
|
292
364
|
|
293
365
|
```ruby
|
294
366
|
vector = RedAmber::Vector.new(['A', 'B', 'NA'])
|
295
|
-
vector.
|
367
|
+
vector.replace(vector == 'NA', nil)
|
296
368
|
# =>
|
297
369
|
#<RedAmber::Vector(:string, size=3):0x000000000000f8ac>
|
298
370
|
["A", "B", nil]
|
299
371
|
```
|
300
372
|
|
373
|
+
- Specifier in indices.
|
374
|
+
|
375
|
+
Specified indices are used 'as sorted'. Position in indices and replacer may not have correspondence.
|
376
|
+
|
377
|
+
```ruby
|
378
|
+
vector = RedAmber::Vector.new([1, 2, 3])
|
379
|
+
indices = [2, 1]
|
380
|
+
replacer = [4, 5]
|
381
|
+
vector.replace(indices, replacer)
|
382
|
+
# =>
|
383
|
+
#<RedAmber::Vector(:uint8, size=3):0x000000000000f244>
|
384
|
+
[1, 4, 5] # not [1, 5, 4]
|
385
|
+
```
|
386
|
+
|
387
|
+
|
301
388
|
### `fill_nil_forward`, `fill_nil_backward` => vector
|
302
389
|
|
303
390
|
Propagate the last valid observation forward (or backward).
|
@@ -315,3 +402,48 @@ integer.fill_nil_backward
|
|
315
402
|
#<RedAmber::Vector(:uint8, size=5):0x000000000000f974>
|
316
403
|
[0, 1, 3, 3, nil]
|
317
404
|
```
|
405
|
+
|
406
|
+
### `boolean_vector.if_else(true_choice, false_choice)` => vector
|
407
|
+
|
408
|
+
Choose values based on self. Self must be a boolean Vector.
|
409
|
+
|
410
|
+
`true_choice`, `false_choice` must be of the same type scalar / array / Vector.
|
411
|
+
`nil` values in `cond` will be promoted to the output.
|
412
|
+
|
413
|
+
This example will normalize negative indices to positive ones.
|
414
|
+
|
415
|
+
```ruby
|
416
|
+
indices = RedAmber::Vector.new([1, -1, 3, -4])
|
417
|
+
array_size = 10
|
418
|
+
normalized_indices = (indices < 0).if_else(indices + array_size, indices)
|
419
|
+
|
420
|
+
# =>
|
421
|
+
#<RedAmber::Vector(:int16, size=4):0x000000000000f85c>
|
422
|
+
[1, 9, 3, 6]
|
423
|
+
```
|
424
|
+
|
425
|
+
### `is_in(values)` => boolean vector
|
426
|
+
|
427
|
+
For each element in self, return true if it is found in given `values`, false otherwise.
|
428
|
+
By default, nulls are matched against the value set. (This will be changed in SetLookupOptions: not impremented.)
|
429
|
+
|
430
|
+
```ruby
|
431
|
+
vector = RedAmber::Vector.new %W[A B C D]
|
432
|
+
values = ['A', 'C', 'X']
|
433
|
+
vector.is_in(values)
|
434
|
+
|
435
|
+
# =>
|
436
|
+
#<RedAmber::Vector(:boolean, size=4):0x000000000000f2a8>
|
437
|
+
[true, false, true, false]
|
438
|
+
```
|
439
|
+
|
440
|
+
`values` are casted to the same Class of Vector.
|
441
|
+
|
442
|
+
```ruby
|
443
|
+
vector = RedAmber::Vector.new([1, 2, 255])
|
444
|
+
vector.is_in(1, -1)
|
445
|
+
|
446
|
+
# =>
|
447
|
+
#<RedAmber::Vector(:boolean, size=3):0x000000000000f320>
|
448
|
+
[true, false, true]
|
449
|
+
```
|
data/lib/red-amber.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'arrow'
|
4
|
+
require 'rover-df'
|
5
|
+
|
6
|
+
require_relative 'red_amber/helper'
|
7
|
+
require_relative 'red_amber/data_frame_displayable'
|
8
|
+
require_relative 'red_amber/data_frame_indexable'
|
9
|
+
require_relative 'red_amber/data_frame_selectable'
|
10
|
+
require_relative 'red_amber/data_frame_observation_operation'
|
11
|
+
require_relative 'red_amber/data_frame_variable_operation'
|
12
|
+
require_relative 'red_amber/data_frame'
|
13
|
+
require_relative 'red_amber/vector_functions'
|
14
|
+
require_relative 'red_amber/vector_updatable'
|
15
|
+
require_relative 'red_amber/vector_selectable'
|
16
|
+
require_relative 'red_amber/vector'
|
17
|
+
require_relative 'red_amber/version'
|
18
|
+
|
19
|
+
module RedAmber
|
20
|
+
class Error < StandardError; end
|
21
|
+
|
22
|
+
class DataFrameArgumentError < ArgumentError; end
|
23
|
+
class DataFrameTypeError < TypeError; end
|
24
|
+
|
25
|
+
class VectorArgumentError < ArgumentError; end
|
26
|
+
class VectorTypeError < TypeError; end
|
27
|
+
end
|
data/lib/red_amber/data_frame.rb
CHANGED
@@ -6,11 +6,11 @@ module RedAmber
|
|
6
6
|
class DataFrame
|
7
7
|
# mix-in
|
8
8
|
include DataFrameDisplayable
|
9
|
-
include DataFrameHelper
|
10
9
|
include DataFrameIndexable
|
11
10
|
include DataFrameSelectable
|
12
11
|
include DataFrameObservationOperation
|
13
12
|
include DataFrameVariableOperation
|
13
|
+
include Helper
|
14
14
|
|
15
15
|
def initialize(*args)
|
16
16
|
@variables = @keys = @vectors = @types = @data_types = nil
|
@@ -44,7 +44,7 @@ module RedAmber
|
|
44
44
|
attr_reader :table
|
45
45
|
|
46
46
|
def to_arrow
|
47
|
-
table
|
47
|
+
@table
|
48
48
|
end
|
49
49
|
|
50
50
|
def save(output, options = {})
|
@@ -79,12 +79,12 @@ module RedAmber
|
|
79
79
|
alias_method :var_names, :keys
|
80
80
|
|
81
81
|
def key?(key)
|
82
|
-
keys.include?(key.to_sym)
|
82
|
+
@keys.include?(key.to_sym)
|
83
83
|
end
|
84
84
|
alias_method :has_key?, :key?
|
85
85
|
|
86
86
|
def key_index(key)
|
87
|
-
keys.find_index(key.to_sym)
|
87
|
+
@keys.find_index(key.to_sym)
|
88
88
|
end
|
89
89
|
alias_method :find_index, :key_index
|
90
90
|
alias_method :index, :key_index
|
@@ -101,10 +101,10 @@ module RedAmber
|
|
101
101
|
@vectors || @vectors = init_instance_vars(:vectors)
|
102
102
|
end
|
103
103
|
|
104
|
-
def
|
105
|
-
0...size
|
104
|
+
def indices
|
105
|
+
(0...size).to_a
|
106
106
|
end
|
107
|
-
alias_method :
|
107
|
+
alias_method :indexes, :indices
|
108
108
|
|
109
109
|
def to_h
|
110
110
|
variables.transform_values(&:to_a)
|
@@ -133,6 +133,19 @@ module RedAmber
|
|
133
133
|
Rover::DataFrame.new(to_h)
|
134
134
|
end
|
135
135
|
|
136
|
+
def to_iruby
|
137
|
+
require 'iruby'
|
138
|
+
return ['text/plain', '(empty DataFrame)'] if empty?
|
139
|
+
|
140
|
+
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'tdr') == 'table'
|
141
|
+
['text/html', html_table]
|
142
|
+
elsif size <= 5
|
143
|
+
['text/plain', tdr_str(tally: 0)]
|
144
|
+
else
|
145
|
+
['text/plain', tdr_str]
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
136
149
|
private
|
137
150
|
|
138
151
|
# initialize @variable, @keys, @vectors and return one of them
|
@@ -148,5 +161,24 @@ module RedAmber
|
|
148
161
|
@variables, @keys, @vectors = ary
|
149
162
|
ary[%i[variables keys vectors].index(var)]
|
150
163
|
end
|
164
|
+
|
165
|
+
def html_table
|
166
|
+
reduced = size > 8 ? self[0..4, -4..-1] : self
|
167
|
+
|
168
|
+
converted = reduced.assign do
|
169
|
+
vectors.select.with_object({}) do |vector, assigner|
|
170
|
+
if vector.has_nil?
|
171
|
+
assigner[vector.key] = vector.to_a.map do |e|
|
172
|
+
e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
|
173
|
+
e = '""' if e.empty? # empty string
|
174
|
+
e.sub(/(\s+)/, '"\1"') # blank spaces
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
181
|
+
"#{size} x #{n_keys} vector#{pl(n_keys)} ; #{html}"
|
182
|
+
end
|
151
183
|
end
|
152
184
|
end
|
@@ -14,7 +14,11 @@ module RedAmber
|
|
14
14
|
# def summary() end
|
15
15
|
|
16
16
|
def inspect
|
17
|
-
|
17
|
+
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'tdr') == 'table'
|
18
|
+
"#<#{shape_str(with_id: true)}>\n#{self}"
|
19
|
+
else
|
20
|
+
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
|
21
|
+
end
|
18
22
|
end
|
19
23
|
|
20
24
|
# - limit: max num of Vectors to show
|
@@ -30,10 +34,6 @@ module RedAmber
|
|
30
34
|
|
31
35
|
private # =====
|
32
36
|
|
33
|
-
def pl(num)
|
34
|
-
num > 1 ? 's' : ''
|
35
|
-
end
|
36
|
-
|
37
37
|
def shape_str(with_id: false)
|
38
38
|
shape_info = empty? ? '(empty)' : "#{size} x #{n_keys} Vector#{pl(n_keys)}"
|
39
39
|
id = with_id ? format(', 0x%016x', object_id) : ''
|
@@ -81,12 +81,12 @@ module RedAmber
|
|
81
81
|
end
|
82
82
|
|
83
83
|
def make_header_format(levels, headers, quoted_keys)
|
84
|
-
# find longest word to adjust
|
84
|
+
# find longest word to adjust width
|
85
85
|
w_idx = n_keys.to_s.size
|
86
86
|
w_key = [quoted_keys.map(&:size).max, headers[:key].size].max
|
87
87
|
w_type = [types.map(&:size).max, headers[:type].size].max
|
88
|
-
|
89
|
-
"%-#{w_idx}s %-#{w_key}s %-#{w_type}s %#{
|
88
|
+
w_level = [levels.map { |l| l.to_s.size }.max, headers[:levels].size].max
|
89
|
+
"%-#{w_idx}s %-#{w_key}s %-#{w_type}s %#{w_level}s %s\n"
|
90
90
|
end
|
91
91
|
|
92
92
|
def type_group(data_type)
|
@@ -3,81 +3,9 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-ins for the class DataFrame
|
5
5
|
module DataFrameObservationOperation
|
6
|
-
# slice and select some observations to create sub DataFrame
|
7
|
-
def slice(*args, &block)
|
8
|
-
slicer = args
|
9
|
-
if block
|
10
|
-
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
11
|
-
|
12
|
-
slicer = instance_eval(&block)
|
13
|
-
end
|
14
|
-
slicer = [slicer].flatten
|
15
|
-
return remove_all_values if slicer.empty? || slicer[0].nil?
|
16
|
-
|
17
|
-
# filter with same length
|
18
|
-
booleans = nil
|
19
|
-
if slicer[0].is_a?(Vector) || slicer[0].is_a?(Arrow::BooleanArray)
|
20
|
-
booleans = slicer[0].to_a
|
21
|
-
elsif slicer.size == size && booleans?(slicer)
|
22
|
-
booleans = slicer
|
23
|
-
end
|
24
|
-
return select_obs_by_boolean(booleans) if booleans
|
25
|
-
|
26
|
-
# filter with indexes
|
27
|
-
slicer = expand_range(slicer)
|
28
|
-
return map_indices(*slicer) if integers?(slicer)
|
29
|
-
|
30
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
31
|
-
end
|
32
|
-
|
33
|
-
# remove selected observations to create sub DataFrame
|
34
|
-
def remove(*args, &block)
|
35
|
-
remover = args
|
36
|
-
if block
|
37
|
-
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
38
|
-
|
39
|
-
remover = instance_eval(&block)
|
40
|
-
end
|
41
|
-
remover = [remover].flatten
|
42
|
-
|
43
|
-
return self if remover.empty?
|
44
|
-
|
45
|
-
# filter with same length
|
46
|
-
booleans = nil
|
47
|
-
if remover[0].is_a?(Vector) || remover[0].is_a?(Arrow::BooleanArray)
|
48
|
-
booleans = remover[0].to_a
|
49
|
-
elsif remover.size == size && booleans?(remover)
|
50
|
-
booleans = remover
|
51
|
-
end
|
52
|
-
if booleans
|
53
|
-
inverted = booleans.map(&:!)
|
54
|
-
return select_obs_by_boolean(inverted)
|
55
|
-
end
|
56
|
-
|
57
|
-
# filter with indexes
|
58
|
-
slicer = indexes.to_a - expand_range(remover)
|
59
|
-
return remove_all_values if slicer.empty?
|
60
|
-
return map_indices(*slicer) if integers?(slicer)
|
61
|
-
|
62
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
63
|
-
end
|
64
|
-
|
65
|
-
def remove_nil
|
66
|
-
func = Arrow::Function.find(:drop_null)
|
67
|
-
DataFrame.new(func.execute([table]).value)
|
68
|
-
end
|
69
|
-
alias_method :drop_nil, :remove_nil
|
70
|
-
|
71
6
|
def group(aggregating_keys, func, target_keys)
|
72
7
|
t = table.group(*aggregating_keys)
|
73
8
|
RedAmber::DataFrame.new(t.send(func, *target_keys))
|
74
9
|
end
|
75
|
-
|
76
|
-
private
|
77
|
-
|
78
|
-
# return a DataFrame with same keys as self without values
|
79
|
-
def remove_all_values
|
80
|
-
DataFrame.new(keys.each_with_object({}) { |key, h| h[key] = [] })
|
81
|
-
end
|
82
10
|
end
|
83
11
|
end
|