red_amber 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/CHANGELOG.md +125 -0
- data/README.md +86 -269
- data/doc/DataFrame.md +427 -281
- data/doc/Vector.md +35 -54
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +33 -41
- data/lib/red_amber/data_frame_displayable.rb +59 -6
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +12 -10
- data/lib/red_amber/data_frame_selectable.rb +53 -9
- data/lib/red_amber/data_frame_variable_operation.rb +57 -20
- data/lib/red_amber/group.rb +5 -3
- data/lib/red_amber/helper.rb +20 -18
- data/lib/red_amber/vector.rb +50 -31
- data/lib/red_amber/vector_functions.rb +21 -24
- data/lib/red_amber/vector_selectable.rb +18 -9
- data/lib/red_amber/vector_updatable.rb +6 -3
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- metadata +13 -3
- data/doc/examples_of_red_amber.ipynb +0 -6783
@@ -3,8 +3,8 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
|
-
# select
|
7
|
-
# select
|
6
|
+
# select columns: [symbol] or [string]
|
7
|
+
# select rows: [array of index], [range]
|
8
8
|
def [](*args)
|
9
9
|
args.flatten!
|
10
10
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
@@ -22,17 +22,17 @@ module RedAmber
|
|
22
22
|
raise DataFrameArgumentError, "Invalid argument: #{args}"
|
23
23
|
end
|
24
24
|
|
25
|
-
# slice and select
|
25
|
+
# slice and select rows to create sub DataFrame
|
26
26
|
def slice(*args, &block)
|
27
27
|
slicer = args
|
28
28
|
if block
|
29
29
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
30
30
|
|
31
|
-
slicer = instance_eval(&block)
|
31
|
+
slicer = [instance_eval(&block)]
|
32
32
|
end
|
33
|
-
slicer
|
33
|
+
slicer.flatten!
|
34
34
|
|
35
|
-
raise DataFrameArgumentError, '
|
35
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
36
36
|
return remove_all_values if slicer.empty? || slicer[0].nil?
|
37
37
|
|
38
38
|
vector = parse_to_vector(slicer)
|
@@ -46,15 +46,59 @@ module RedAmber
|
|
46
46
|
raise DataFrameArgumentError, "Invalid argument #{slicer}"
|
47
47
|
end
|
48
48
|
|
49
|
-
|
49
|
+
def slice_by(key, keep_key: false, &block)
|
50
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
51
|
+
raise DataFrameArgumentError, 'No block given' unless block
|
52
|
+
raise DataFrameArgumentError, "#{key} is no a key of self" unless key?(key)
|
53
|
+
return self if key.nil?
|
54
|
+
|
55
|
+
slicer = instance_eval(&block)
|
56
|
+
return DataFrame.new unless slicer
|
57
|
+
|
58
|
+
if slicer.is_a?(Range)
|
59
|
+
from = slicer.begin
|
60
|
+
from =
|
61
|
+
if from.is_a?(String)
|
62
|
+
self[key].index(from)
|
63
|
+
elsif from.nil?
|
64
|
+
0
|
65
|
+
elsif from < 0
|
66
|
+
size + from
|
67
|
+
else
|
68
|
+
from
|
69
|
+
end
|
70
|
+
to = slicer.end
|
71
|
+
to =
|
72
|
+
if to.is_a?(String)
|
73
|
+
self[key].index(to)
|
74
|
+
elsif to.nil?
|
75
|
+
size - 1
|
76
|
+
elsif to < 0
|
77
|
+
size + to
|
78
|
+
else
|
79
|
+
to
|
80
|
+
end
|
81
|
+
slicer = (from..to).to_a
|
82
|
+
else
|
83
|
+
slicer = slicer.map { |x| x.is_a?(String) ? self[key].index(x) : x }
|
84
|
+
end
|
85
|
+
|
86
|
+
if keep_key
|
87
|
+
take(slicer)
|
88
|
+
else
|
89
|
+
take(slicer).drop(key)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# remove selected rows to create remainer DataFrame
|
50
94
|
def remove(*args, &block)
|
51
95
|
remover = args
|
52
96
|
if block
|
53
97
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
54
98
|
|
55
|
-
remover = instance_eval(&block)
|
99
|
+
remover = [instance_eval(&block)]
|
56
100
|
end
|
57
|
-
remover
|
101
|
+
remover.flatten!
|
58
102
|
|
59
103
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
60
104
|
return self if remover.empty? || remover[0].nil?
|
@@ -9,18 +9,28 @@ module RedAmber
|
|
9
9
|
if block
|
10
10
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
11
11
|
|
12
|
-
picker = instance_eval(&block)
|
12
|
+
picker = [instance_eval(&block)]
|
13
13
|
end
|
14
|
-
picker
|
14
|
+
picker.flatten!
|
15
15
|
return DataFrame.new if picker.empty? || picker == [nil]
|
16
16
|
|
17
|
-
|
17
|
+
key_vector = Vector.new(keys)
|
18
|
+
vec = parse_to_vector(picker, vsize: n_keys)
|
19
|
+
|
20
|
+
ary =
|
21
|
+
if vec.boolean?
|
22
|
+
key_vector.filter(*vec).to_a
|
23
|
+
elsif vec.numeric?
|
24
|
+
key_vector.take(*vec).to_a
|
25
|
+
elsif vec.string? || vec.dictionary?
|
26
|
+
picker
|
27
|
+
else
|
28
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
29
|
+
end
|
18
30
|
|
19
31
|
# DataFrame#[] creates a Vector with single key is specified.
|
20
32
|
# DataFrame#pick creates a DataFrame with single key.
|
21
|
-
|
22
|
-
|
23
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
33
|
+
DataFrame.new(@table[ary])
|
24
34
|
end
|
25
35
|
|
26
36
|
# drop some variables to create remainer sub DataFrame
|
@@ -29,19 +39,29 @@ module RedAmber
|
|
29
39
|
if block
|
30
40
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
31
41
|
|
32
|
-
dropper = instance_eval(&block)
|
42
|
+
dropper = [instance_eval(&block)]
|
33
43
|
end
|
34
|
-
dropper
|
35
|
-
|
44
|
+
dropper.flatten!
|
45
|
+
|
46
|
+
key_vector = Vector.new(keys)
|
47
|
+
vec = parse_to_vector(dropper, vsize: n_keys)
|
48
|
+
|
49
|
+
ary =
|
50
|
+
if vec.boolean?
|
51
|
+
key_vector.filter(*vec.primitive_invert).each.map(&:to_sym) # Array
|
52
|
+
elsif vec.numeric?
|
53
|
+
keys - key_vector.take(*vec).each.map(&:to_sym) # Array
|
54
|
+
elsif vec.string? || vec.dictionary?
|
55
|
+
keys - dropper
|
56
|
+
else
|
57
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
58
|
+
end
|
36
59
|
|
37
|
-
|
38
|
-
return DataFrame.new if picker.empty?
|
60
|
+
return DataFrame.new if ary.empty?
|
39
61
|
|
40
62
|
# DataFrame#[] creates a Vector with single key is specified.
|
41
63
|
# DataFrame#drop creates a DataFrame with single key.
|
42
|
-
|
43
|
-
|
44
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
64
|
+
DataFrame.new(@table[ary])
|
45
65
|
end
|
46
66
|
|
47
67
|
# rename variables to create a new DataFrame
|
@@ -91,10 +111,20 @@ module RedAmber
|
|
91
111
|
|
92
112
|
def assign_update(*assigner, &block)
|
93
113
|
if block
|
94
|
-
|
95
|
-
|
96
|
-
|
114
|
+
assigner_from_block = instance_eval(&block)
|
115
|
+
assigner =
|
116
|
+
if assigner.empty?
|
117
|
+
# block only
|
118
|
+
[assigner_from_block]
|
119
|
+
# If Ruby >= 3.0, one line pattern match can be used
|
120
|
+
# assigner_from_block in [Array, *]
|
121
|
+
elsif multiple_assigner?(assigner_from_block)
|
122
|
+
assigner.zip(assigner_from_block)
|
123
|
+
else
|
124
|
+
assigner.zip([assigner_from_block])
|
125
|
+
end
|
97
126
|
end
|
127
|
+
|
98
128
|
case assigner
|
99
129
|
in [] | [nil] | [{}] | [[]]
|
100
130
|
return self
|
@@ -113,6 +143,8 @@ module RedAmber
|
|
113
143
|
updater = {}
|
114
144
|
appender = {}
|
115
145
|
key_array_pairs.each do |key, array|
|
146
|
+
raise DataFrameArgumentError, "Empty column data: #{key} => nil" if array.nil?
|
147
|
+
|
116
148
|
if keys.include? key
|
117
149
|
updater[key] = array
|
118
150
|
else
|
@@ -153,7 +185,7 @@ module RedAmber
|
|
153
185
|
data = updater[key]
|
154
186
|
next unless data
|
155
187
|
|
156
|
-
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
|
188
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.nil? || data.size != size
|
157
189
|
|
158
190
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
159
191
|
fields[i] = Arrow::Field.new(key, a.value_data_type)
|
@@ -179,8 +211,13 @@ module RedAmber
|
|
179
211
|
end
|
180
212
|
end
|
181
213
|
|
182
|
-
def
|
183
|
-
|
214
|
+
def multiple_assigner?(assigner)
|
215
|
+
case assigner
|
216
|
+
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
217
|
+
true
|
218
|
+
else
|
219
|
+
false
|
220
|
+
end
|
184
221
|
end
|
185
222
|
end
|
186
223
|
end
|
data/lib/red_amber/group.rb
CHANGED
@@ -54,9 +54,11 @@ module RedAmber
|
|
54
54
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
55
55
|
|
56
56
|
df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
57
|
-
df = df
|
58
|
-
# if counts are the same (
|
59
|
-
|
57
|
+
df = df.pick(@group_keys, df.keys - @group_keys)
|
58
|
+
# if counts are the same (and do not include NaN or nil), aggregate count columns.
|
59
|
+
if func == :count && df.pick(@group_keys.size..).to_h.values.uniq.size == 1
|
60
|
+
df = df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
|
61
|
+
end
|
60
62
|
df
|
61
63
|
end
|
62
64
|
end
|
data/lib/red_amber/helper.rb
CHANGED
@@ -17,10 +17,6 @@ module RedAmber
|
|
17
17
|
enum.all?(Integer)
|
18
18
|
end
|
19
19
|
|
20
|
-
def sym_or_str?(enum)
|
21
|
-
enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
|
22
|
-
end
|
23
|
-
|
24
20
|
def booleans?(enum)
|
25
21
|
enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
26
22
|
end
|
@@ -29,32 +25,38 @@ module RedAmber
|
|
29
25
|
DataFrame.new(key => vector.data)
|
30
26
|
end
|
31
27
|
|
32
|
-
def parse_to_vector(args)
|
28
|
+
def parse_to_vector(args, vsize: size)
|
33
29
|
a = args.reduce([]) do |accum, elem|
|
34
|
-
accum.concat(normalize_element(elem))
|
30
|
+
accum.concat(normalize_element(elem, vsize: vsize))
|
35
31
|
end
|
36
32
|
Vector.new(a)
|
37
33
|
end
|
38
34
|
|
39
|
-
def normalize_element(elem)
|
35
|
+
def normalize_element(elem, vsize: size)
|
40
36
|
case elem
|
41
|
-
when
|
42
|
-
[
|
37
|
+
when NilClass
|
38
|
+
[nil]
|
43
39
|
when Range
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
40
|
+
bg = elem.begin
|
41
|
+
en = elem.end
|
42
|
+
if [bg, en].any?(Integer)
|
43
|
+
bg += vsize if bg&.negative?
|
44
|
+
en += vsize if en&.negative?
|
45
|
+
en -= 1 if en.is_a?(Integer) && elem.exclude_end?
|
46
|
+
if bg&.negative? || (en && en >= vsize)
|
47
|
+
raise DataFrameArgumentError, "Index out of range: #{elem} for 0..#{vsize - 1}"
|
50
48
|
end
|
51
49
|
|
52
|
-
(0...
|
50
|
+
Array(0...vsize)[elem]
|
51
|
+
elsif bg.nil? && en.nil?
|
52
|
+
Array(0...vsize)
|
53
53
|
else
|
54
|
-
elem
|
54
|
+
Array[elem]
|
55
55
|
end
|
56
|
+
when Enumerator
|
57
|
+
elem.to_a
|
56
58
|
else
|
57
|
-
Array
|
59
|
+
Array[elem]
|
58
60
|
end
|
59
61
|
end
|
60
62
|
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -11,27 +11,28 @@ module RedAmber
|
|
11
11
|
include Helper
|
12
12
|
|
13
13
|
def initialize(*array)
|
14
|
-
@key = nil # default is 'headless'
|
15
|
-
if array.empty? || array
|
14
|
+
@key = nil # default is 'headless' Vector
|
15
|
+
if array.empty? || array.first.nil?
|
16
16
|
Vector.new([])
|
17
17
|
else
|
18
18
|
array.flatten!
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
19
|
+
@data =
|
20
|
+
case array
|
21
|
+
in [Vector => v]
|
22
|
+
v.data
|
23
|
+
in [Arrow::Array => a]
|
24
|
+
a
|
25
|
+
in [Arrow::ChunkedArray => ca]
|
26
|
+
ca
|
27
|
+
in [Range => r]
|
28
|
+
Arrow::Array.new(Array(r))
|
29
|
+
else
|
30
|
+
begin
|
31
|
+
Arrow::Array.new(Array(array))
|
32
|
+
rescue Error
|
33
|
+
raise VectorArgumentError, "Invalid argument: #{array}"
|
34
|
+
end
|
35
|
+
end
|
35
36
|
end
|
36
37
|
end
|
37
38
|
|
@@ -43,19 +44,24 @@ module RedAmber
|
|
43
44
|
end
|
44
45
|
|
45
46
|
def inspect(limit: 80)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
s
|
53
|
-
|
47
|
+
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
|
48
|
+
# Better performance than `.upcase == 'MINIMUM'``
|
49
|
+
"#{self.class}(:#{type}, size=#{size})"
|
50
|
+
else
|
51
|
+
sio = StringIO.new << '['
|
52
|
+
to_a.each_with_object(sio).with_index do |(e, s), i|
|
53
|
+
next_str = "#{s.size > 1 ? ', ' : ''}#{e.inspect}"
|
54
|
+
if (s.size + next_str.size) < limit
|
55
|
+
s << next_str
|
56
|
+
else
|
57
|
+
s << ', ... ' if i < size
|
58
|
+
break
|
59
|
+
end
|
54
60
|
end
|
55
|
-
|
56
|
-
sio << ']'
|
61
|
+
sio << ']'
|
57
62
|
|
58
|
-
|
63
|
+
format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n", object_id, sio.string
|
64
|
+
end
|
59
65
|
end
|
60
66
|
|
61
67
|
def values
|
@@ -71,7 +77,7 @@ module RedAmber
|
|
71
77
|
alias_method :indeces, :indices
|
72
78
|
|
73
79
|
def to_ary
|
74
|
-
|
80
|
+
values
|
75
81
|
end
|
76
82
|
|
77
83
|
def size
|
@@ -110,6 +116,10 @@ module RedAmber
|
|
110
116
|
type_class == Arrow::StringDataType
|
111
117
|
end
|
112
118
|
|
119
|
+
def dictionary?
|
120
|
+
type_class == Arrow::DictionaryDataType
|
121
|
+
end
|
122
|
+
|
113
123
|
def temporal?
|
114
124
|
type_class < Arrow::TemporalDataType
|
115
125
|
end
|
@@ -122,14 +132,23 @@ module RedAmber
|
|
122
132
|
return enum_for(:each) unless block_given?
|
123
133
|
|
124
134
|
size.times do |i|
|
125
|
-
yield
|
135
|
+
yield data[i]
|
126
136
|
end
|
127
137
|
end
|
128
138
|
|
139
|
+
def map(&block)
|
140
|
+
return enum_for(:map) unless block
|
141
|
+
|
142
|
+
Vector.new(to_a.map(&block))
|
143
|
+
end
|
144
|
+
alias_method :collect, :map
|
145
|
+
|
146
|
+
# undocumented
|
129
147
|
def chunked?
|
130
148
|
@data.is_a? Arrow::ChunkedArray
|
131
149
|
end
|
132
150
|
|
151
|
+
# undocumented
|
133
152
|
def n_chunks
|
134
153
|
chunked? ? @data.n_chunks : 0
|
135
154
|
end
|
@@ -34,13 +34,6 @@ module RedAmber
|
|
34
34
|
end
|
35
35
|
alias_method :std, :sd
|
36
36
|
|
37
|
-
# option(s) required
|
38
|
-
# - index
|
39
|
-
|
40
|
-
# Returns other than value
|
41
|
-
# - mode
|
42
|
-
# - tdigest
|
43
|
-
|
44
37
|
# Return quantile
|
45
38
|
# 0.5 quantile (median) is returned by default.
|
46
39
|
# Or return quantile for specified probability (prob).
|
@@ -88,8 +81,9 @@ module RedAmber
|
|
88
81
|
|
89
82
|
# [Unary element-wise]: vector.func => vector
|
90
83
|
unary_element_wise =
|
91
|
-
%i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward
|
92
|
-
is_inf is_nan is_null is_valid
|
84
|
+
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos fill_null_backward \
|
85
|
+
fill_null_forward floor is_finite is_inf is_nan is_null is_valid ln log10 log1p log2 \
|
86
|
+
round round_to_multiple sign sin tan trunc unique]
|
93
87
|
unary_element_wise.each do |function|
|
94
88
|
define_method(function) do |**options|
|
95
89
|
datum = exec_func_unary(function, options)
|
@@ -129,16 +123,9 @@ module RedAmber
|
|
129
123
|
end
|
130
124
|
alias_method :not, :invert
|
131
125
|
|
132
|
-
# NaN support needed
|
133
|
-
# - acos asin ln log10 log1p log2
|
134
|
-
|
135
|
-
# Functions with numerical range check
|
136
|
-
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
137
|
-
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
138
|
-
|
139
126
|
# [Binary element-wise]: vector.func(other) => vector
|
140
127
|
binary_element_wise =
|
141
|
-
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
|
128
|
+
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
|
142
129
|
binary_element_wise.each do |function|
|
143
130
|
define_method(function) do |other, **options|
|
144
131
|
datum = exec_func_binary(function, other, options)
|
@@ -162,13 +149,6 @@ module RedAmber
|
|
162
149
|
end
|
163
150
|
end
|
164
151
|
|
165
|
-
# NaN support needed
|
166
|
-
# - logb
|
167
|
-
|
168
|
-
# Functions with numerical range check
|
169
|
-
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
170
|
-
# shift_left_checked shift_right_checked
|
171
|
-
|
172
152
|
# [Binary element-wise with operator]: vector.func(other) => vector
|
173
153
|
binary_element_wise_op = {
|
174
154
|
add: '+',
|
@@ -216,6 +196,23 @@ module RedAmber
|
|
216
196
|
[Vector.new(Array(other) * size), self]
|
217
197
|
end
|
218
198
|
|
199
|
+
# < Not implimented yet > ---
|
200
|
+
|
201
|
+
# option(s) required
|
202
|
+
# - index
|
203
|
+
|
204
|
+
# Returns other than value
|
205
|
+
# - mode
|
206
|
+
# - tdigest
|
207
|
+
|
208
|
+
# Functions with numerical range check (unary)
|
209
|
+
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
210
|
+
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
211
|
+
|
212
|
+
# Functions with numerical range check (binary)
|
213
|
+
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
214
|
+
# shift_left_checked shift_right_checked
|
215
|
+
|
219
216
|
# (array functions)
|
220
217
|
# dictionary_encode,
|
221
218
|
# partition_nth_indices,
|
@@ -25,7 +25,13 @@ module RedAmber
|
|
25
25
|
end
|
26
26
|
|
27
27
|
# TODO: support for option {null_selection_behavior: :drop}
|
28
|
-
def filter(*booleans)
|
28
|
+
def filter(*booleans, &block)
|
29
|
+
if block
|
30
|
+
raise VectorArgumentError, 'Must not specify both arguments and block.' unless booleans.empty?
|
31
|
+
|
32
|
+
booleans = [yield]
|
33
|
+
end
|
34
|
+
|
29
35
|
booleans.flatten!
|
30
36
|
return Vector.new([]) if booleans.empty?
|
31
37
|
|
@@ -46,6 +52,8 @@ module RedAmber
|
|
46
52
|
|
47
53
|
filter_by_array(boolean_array) # returns sub Vector
|
48
54
|
end
|
55
|
+
alias_method :select, :filter
|
56
|
+
alias_method :find_all, :filter
|
49
57
|
|
50
58
|
# @param indices
|
51
59
|
# @param booleans
|
@@ -82,16 +90,17 @@ module RedAmber
|
|
82
90
|
|
83
91
|
# @param values [Array, Arrow::Array, Vector]
|
84
92
|
def is_in(*values)
|
85
|
-
|
93
|
+
self_data = chunked? ? data.pack : data
|
94
|
+
|
86
95
|
array =
|
87
|
-
case values
|
88
|
-
|
89
|
-
values[0].
|
90
|
-
|
91
|
-
values
|
96
|
+
case values
|
97
|
+
in [Vector] | [Arrow::Array] | [Arrow::ChunkedArray]
|
98
|
+
values[0].to_a
|
99
|
+
else
|
100
|
+
Array(values).flatten
|
92
101
|
end
|
93
|
-
|
94
|
-
Vector.new(
|
102
|
+
|
103
|
+
Vector.new(self_data.is_in(array))
|
95
104
|
end
|
96
105
|
|
97
106
|
# Arrow's support required
|
@@ -8,9 +8,10 @@ module RedAmber
|
|
8
8
|
# Functions to make up some data (especially missing) for new data.
|
9
9
|
module VectorUpdatable
|
10
10
|
# Replace data
|
11
|
-
# @param arg [Array, Vector, Arrow::Array] index specifier
|
11
|
+
# @param arg [Array, Vector, Arrow::Array] index specifier or boolean
|
12
12
|
# @param replacer [Array, Vector, Arrow::Array] new data to replace for.
|
13
|
-
# @return [Vector] Replaced new Vector
|
13
|
+
# @return [Vector] Replaced new Vector.
|
14
|
+
# If arg has no true, return self.
|
14
15
|
def replace(args, replacer)
|
15
16
|
args =
|
16
17
|
case args
|
@@ -24,10 +25,12 @@ module RedAmber
|
|
24
25
|
replacer = Array(replacer)
|
25
26
|
return self if args.empty? || args[0].nil?
|
26
27
|
|
27
|
-
replacer = nil if replacer.empty?
|
28
28
|
vector = parse_to_vector(args)
|
29
|
+
replacer = nil if replacer.empty?
|
29
30
|
booleans =
|
30
31
|
if vector.boolean?
|
32
|
+
return self unless vector.any
|
33
|
+
|
31
34
|
vector
|
32
35
|
elsif vector.numeric?
|
33
36
|
replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
@@ -5,6 +5,7 @@ require 'arrow'
|
|
5
5
|
require_relative 'red_amber/helper'
|
6
6
|
require_relative 'red_amber/data_frame_displayable'
|
7
7
|
require_relative 'red_amber/data_frame_indexable'
|
8
|
+
require_relative 'red_amber/data_frame_loadsave'
|
8
9
|
require_relative 'red_amber/data_frame_reshaping'
|
9
10
|
require_relative 'red_amber/data_frame_selectable'
|
10
11
|
require_relative 'red_amber/data_frame_variable_operation'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -45,16 +45,25 @@ files:
|
|
45
45
|
- doc/CODE_OF_CONDUCT.md
|
46
46
|
- doc/DataFrame.md
|
47
47
|
- doc/Vector.md
|
48
|
-
- doc/examples_of_red_amber.ipynb
|
49
48
|
- doc/image/arrow_table_new.png
|
49
|
+
- doc/image/basic_verbs.png
|
50
50
|
- doc/image/dataframe/assign.png
|
51
|
+
- doc/image/dataframe/assign_operation.png
|
51
52
|
- doc/image/dataframe/drop.png
|
52
53
|
- doc/image/dataframe/pick.png
|
54
|
+
- doc/image/dataframe/pick_operation.png
|
53
55
|
- doc/image/dataframe/remove.png
|
54
56
|
- doc/image/dataframe/rename.png
|
57
|
+
- doc/image/dataframe/rename_operation.png
|
58
|
+
- doc/image/dataframe/reshaping_DataFrames.png
|
55
59
|
- doc/image/dataframe/slice.png
|
60
|
+
- doc/image/dataframe/slice_operation.png
|
56
61
|
- doc/image/dataframe_model.png
|
57
62
|
- doc/image/example_in_red_arrow.png
|
63
|
+
- doc/image/group_operation.png
|
64
|
+
- doc/image/replace-if_then.png
|
65
|
+
- doc/image/reshaping_dataframe.png
|
66
|
+
- doc/image/screenshot.png
|
58
67
|
- doc/image/tdr.png
|
59
68
|
- doc/image/tdr_and_table.png
|
60
69
|
- doc/image/tidy_data_in_TDR.png
|
@@ -69,6 +78,7 @@ files:
|
|
69
78
|
- lib/red_amber/data_frame.rb
|
70
79
|
- lib/red_amber/data_frame_displayable.rb
|
71
80
|
- lib/red_amber/data_frame_indexable.rb
|
81
|
+
- lib/red_amber/data_frame_loadsave.rb
|
72
82
|
- lib/red_amber/data_frame_reshaping.rb
|
73
83
|
- lib/red_amber/data_frame_selectable.rb
|
74
84
|
- lib/red_amber/data_frame_variable_operation.rb
|