red_amber 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/CHANGELOG.md +125 -0
- data/README.md +86 -269
- data/doc/DataFrame.md +427 -281
- data/doc/Vector.md +35 -54
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +33 -41
- data/lib/red_amber/data_frame_displayable.rb +59 -6
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +12 -10
- data/lib/red_amber/data_frame_selectable.rb +53 -9
- data/lib/red_amber/data_frame_variable_operation.rb +57 -20
- data/lib/red_amber/group.rb +5 -3
- data/lib/red_amber/helper.rb +20 -18
- data/lib/red_amber/vector.rb +50 -31
- data/lib/red_amber/vector_functions.rb +21 -24
- data/lib/red_amber/vector_selectable.rb +18 -9
- data/lib/red_amber/vector_updatable.rb +6 -3
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- metadata +13 -3
- data/doc/examples_of_red_amber.ipynb +0 -6783
@@ -3,8 +3,8 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
|
-
# select
|
7
|
-
# select
|
6
|
+
# select columns: [symbol] or [string]
|
7
|
+
# select rows: [array of index], [range]
|
8
8
|
def [](*args)
|
9
9
|
args.flatten!
|
10
10
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
@@ -22,17 +22,17 @@ module RedAmber
|
|
22
22
|
raise DataFrameArgumentError, "Invalid argument: #{args}"
|
23
23
|
end
|
24
24
|
|
25
|
-
# slice and select
|
25
|
+
# slice and select rows to create sub DataFrame
|
26
26
|
def slice(*args, &block)
|
27
27
|
slicer = args
|
28
28
|
if block
|
29
29
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
30
30
|
|
31
|
-
slicer = instance_eval(&block)
|
31
|
+
slicer = [instance_eval(&block)]
|
32
32
|
end
|
33
|
-
slicer
|
33
|
+
slicer.flatten!
|
34
34
|
|
35
|
-
raise DataFrameArgumentError, '
|
35
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
36
36
|
return remove_all_values if slicer.empty? || slicer[0].nil?
|
37
37
|
|
38
38
|
vector = parse_to_vector(slicer)
|
@@ -46,15 +46,59 @@ module RedAmber
|
|
46
46
|
raise DataFrameArgumentError, "Invalid argument #{slicer}"
|
47
47
|
end
|
48
48
|
|
49
|
-
|
49
|
+
def slice_by(key, keep_key: false, &block)
|
50
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
51
|
+
raise DataFrameArgumentError, 'No block given' unless block
|
52
|
+
raise DataFrameArgumentError, "#{key} is no a key of self" unless key?(key)
|
53
|
+
return self if key.nil?
|
54
|
+
|
55
|
+
slicer = instance_eval(&block)
|
56
|
+
return DataFrame.new unless slicer
|
57
|
+
|
58
|
+
if slicer.is_a?(Range)
|
59
|
+
from = slicer.begin
|
60
|
+
from =
|
61
|
+
if from.is_a?(String)
|
62
|
+
self[key].index(from)
|
63
|
+
elsif from.nil?
|
64
|
+
0
|
65
|
+
elsif from < 0
|
66
|
+
size + from
|
67
|
+
else
|
68
|
+
from
|
69
|
+
end
|
70
|
+
to = slicer.end
|
71
|
+
to =
|
72
|
+
if to.is_a?(String)
|
73
|
+
self[key].index(to)
|
74
|
+
elsif to.nil?
|
75
|
+
size - 1
|
76
|
+
elsif to < 0
|
77
|
+
size + to
|
78
|
+
else
|
79
|
+
to
|
80
|
+
end
|
81
|
+
slicer = (from..to).to_a
|
82
|
+
else
|
83
|
+
slicer = slicer.map { |x| x.is_a?(String) ? self[key].index(x) : x }
|
84
|
+
end
|
85
|
+
|
86
|
+
if keep_key
|
87
|
+
take(slicer)
|
88
|
+
else
|
89
|
+
take(slicer).drop(key)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# remove selected rows to create remainer DataFrame
|
50
94
|
def remove(*args, &block)
|
51
95
|
remover = args
|
52
96
|
if block
|
53
97
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
54
98
|
|
55
|
-
remover = instance_eval(&block)
|
99
|
+
remover = [instance_eval(&block)]
|
56
100
|
end
|
57
|
-
remover
|
101
|
+
remover.flatten!
|
58
102
|
|
59
103
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
60
104
|
return self if remover.empty? || remover[0].nil?
|
@@ -9,18 +9,28 @@ module RedAmber
|
|
9
9
|
if block
|
10
10
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
11
11
|
|
12
|
-
picker = instance_eval(&block)
|
12
|
+
picker = [instance_eval(&block)]
|
13
13
|
end
|
14
|
-
picker
|
14
|
+
picker.flatten!
|
15
15
|
return DataFrame.new if picker.empty? || picker == [nil]
|
16
16
|
|
17
|
-
|
17
|
+
key_vector = Vector.new(keys)
|
18
|
+
vec = parse_to_vector(picker, vsize: n_keys)
|
19
|
+
|
20
|
+
ary =
|
21
|
+
if vec.boolean?
|
22
|
+
key_vector.filter(*vec).to_a
|
23
|
+
elsif vec.numeric?
|
24
|
+
key_vector.take(*vec).to_a
|
25
|
+
elsif vec.string? || vec.dictionary?
|
26
|
+
picker
|
27
|
+
else
|
28
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
29
|
+
end
|
18
30
|
|
19
31
|
# DataFrame#[] creates a Vector with single key is specified.
|
20
32
|
# DataFrame#pick creates a DataFrame with single key.
|
21
|
-
|
22
|
-
|
23
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
33
|
+
DataFrame.new(@table[ary])
|
24
34
|
end
|
25
35
|
|
26
36
|
# drop some variables to create remainer sub DataFrame
|
@@ -29,19 +39,29 @@ module RedAmber
|
|
29
39
|
if block
|
30
40
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
31
41
|
|
32
|
-
dropper = instance_eval(&block)
|
42
|
+
dropper = [instance_eval(&block)]
|
33
43
|
end
|
34
|
-
dropper
|
35
|
-
|
44
|
+
dropper.flatten!
|
45
|
+
|
46
|
+
key_vector = Vector.new(keys)
|
47
|
+
vec = parse_to_vector(dropper, vsize: n_keys)
|
48
|
+
|
49
|
+
ary =
|
50
|
+
if vec.boolean?
|
51
|
+
key_vector.filter(*vec.primitive_invert).each.map(&:to_sym) # Array
|
52
|
+
elsif vec.numeric?
|
53
|
+
keys - key_vector.take(*vec).each.map(&:to_sym) # Array
|
54
|
+
elsif vec.string? || vec.dictionary?
|
55
|
+
keys - dropper
|
56
|
+
else
|
57
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
58
|
+
end
|
36
59
|
|
37
|
-
|
38
|
-
return DataFrame.new if picker.empty?
|
60
|
+
return DataFrame.new if ary.empty?
|
39
61
|
|
40
62
|
# DataFrame#[] creates a Vector with single key is specified.
|
41
63
|
# DataFrame#drop creates a DataFrame with single key.
|
42
|
-
|
43
|
-
|
44
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
64
|
+
DataFrame.new(@table[ary])
|
45
65
|
end
|
46
66
|
|
47
67
|
# rename variables to create a new DataFrame
|
@@ -91,10 +111,20 @@ module RedAmber
|
|
91
111
|
|
92
112
|
def assign_update(*assigner, &block)
|
93
113
|
if block
|
94
|
-
|
95
|
-
|
96
|
-
|
114
|
+
assigner_from_block = instance_eval(&block)
|
115
|
+
assigner =
|
116
|
+
if assigner.empty?
|
117
|
+
# block only
|
118
|
+
[assigner_from_block]
|
119
|
+
# If Ruby >= 3.0, one line pattern match can be used
|
120
|
+
# assigner_from_block in [Array, *]
|
121
|
+
elsif multiple_assigner?(assigner_from_block)
|
122
|
+
assigner.zip(assigner_from_block)
|
123
|
+
else
|
124
|
+
assigner.zip([assigner_from_block])
|
125
|
+
end
|
97
126
|
end
|
127
|
+
|
98
128
|
case assigner
|
99
129
|
in [] | [nil] | [{}] | [[]]
|
100
130
|
return self
|
@@ -113,6 +143,8 @@ module RedAmber
|
|
113
143
|
updater = {}
|
114
144
|
appender = {}
|
115
145
|
key_array_pairs.each do |key, array|
|
146
|
+
raise DataFrameArgumentError, "Empty column data: #{key} => nil" if array.nil?
|
147
|
+
|
116
148
|
if keys.include? key
|
117
149
|
updater[key] = array
|
118
150
|
else
|
@@ -153,7 +185,7 @@ module RedAmber
|
|
153
185
|
data = updater[key]
|
154
186
|
next unless data
|
155
187
|
|
156
|
-
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
|
188
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.nil? || data.size != size
|
157
189
|
|
158
190
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
159
191
|
fields[i] = Arrow::Field.new(key, a.value_data_type)
|
@@ -179,8 +211,13 @@ module RedAmber
|
|
179
211
|
end
|
180
212
|
end
|
181
213
|
|
182
|
-
def
|
183
|
-
|
214
|
+
def multiple_assigner?(assigner)
|
215
|
+
case assigner
|
216
|
+
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
217
|
+
true
|
218
|
+
else
|
219
|
+
false
|
220
|
+
end
|
184
221
|
end
|
185
222
|
end
|
186
223
|
end
|
data/lib/red_amber/group.rb
CHANGED
@@ -54,9 +54,11 @@ module RedAmber
|
|
54
54
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
55
55
|
|
56
56
|
df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
57
|
-
df = df
|
58
|
-
# if counts are the same (
|
59
|
-
|
57
|
+
df = df.pick(@group_keys, df.keys - @group_keys)
|
58
|
+
# if counts are the same (and do not include NaN or nil), aggregate count columns.
|
59
|
+
if func == :count && df.pick(@group_keys.size..).to_h.values.uniq.size == 1
|
60
|
+
df = df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
|
61
|
+
end
|
60
62
|
df
|
61
63
|
end
|
62
64
|
end
|
data/lib/red_amber/helper.rb
CHANGED
@@ -17,10 +17,6 @@ module RedAmber
|
|
17
17
|
enum.all?(Integer)
|
18
18
|
end
|
19
19
|
|
20
|
-
def sym_or_str?(enum)
|
21
|
-
enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
|
22
|
-
end
|
23
|
-
|
24
20
|
def booleans?(enum)
|
25
21
|
enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
26
22
|
end
|
@@ -29,32 +25,38 @@ module RedAmber
|
|
29
25
|
DataFrame.new(key => vector.data)
|
30
26
|
end
|
31
27
|
|
32
|
-
def parse_to_vector(args)
|
28
|
+
def parse_to_vector(args, vsize: size)
|
33
29
|
a = args.reduce([]) do |accum, elem|
|
34
|
-
accum.concat(normalize_element(elem))
|
30
|
+
accum.concat(normalize_element(elem, vsize: vsize))
|
35
31
|
end
|
36
32
|
Vector.new(a)
|
37
33
|
end
|
38
34
|
|
39
|
-
def normalize_element(elem)
|
35
|
+
def normalize_element(elem, vsize: size)
|
40
36
|
case elem
|
41
|
-
when
|
42
|
-
[
|
37
|
+
when NilClass
|
38
|
+
[nil]
|
43
39
|
when Range
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
40
|
+
bg = elem.begin
|
41
|
+
en = elem.end
|
42
|
+
if [bg, en].any?(Integer)
|
43
|
+
bg += vsize if bg&.negative?
|
44
|
+
en += vsize if en&.negative?
|
45
|
+
en -= 1 if en.is_a?(Integer) && elem.exclude_end?
|
46
|
+
if bg&.negative? || (en && en >= vsize)
|
47
|
+
raise DataFrameArgumentError, "Index out of range: #{elem} for 0..#{vsize - 1}"
|
50
48
|
end
|
51
49
|
|
52
|
-
(0...
|
50
|
+
Array(0...vsize)[elem]
|
51
|
+
elsif bg.nil? && en.nil?
|
52
|
+
Array(0...vsize)
|
53
53
|
else
|
54
|
-
elem
|
54
|
+
Array[elem]
|
55
55
|
end
|
56
|
+
when Enumerator
|
57
|
+
elem.to_a
|
56
58
|
else
|
57
|
-
Array
|
59
|
+
Array[elem]
|
58
60
|
end
|
59
61
|
end
|
60
62
|
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -11,27 +11,28 @@ module RedAmber
|
|
11
11
|
include Helper
|
12
12
|
|
13
13
|
def initialize(*array)
|
14
|
-
@key = nil # default is 'headless'
|
15
|
-
if array.empty? || array
|
14
|
+
@key = nil # default is 'headless' Vector
|
15
|
+
if array.empty? || array.first.nil?
|
16
16
|
Vector.new([])
|
17
17
|
else
|
18
18
|
array.flatten!
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
19
|
+
@data =
|
20
|
+
case array
|
21
|
+
in [Vector => v]
|
22
|
+
v.data
|
23
|
+
in [Arrow::Array => a]
|
24
|
+
a
|
25
|
+
in [Arrow::ChunkedArray => ca]
|
26
|
+
ca
|
27
|
+
in [Range => r]
|
28
|
+
Arrow::Array.new(Array(r))
|
29
|
+
else
|
30
|
+
begin
|
31
|
+
Arrow::Array.new(Array(array))
|
32
|
+
rescue Error
|
33
|
+
raise VectorArgumentError, "Invalid argument: #{array}"
|
34
|
+
end
|
35
|
+
end
|
35
36
|
end
|
36
37
|
end
|
37
38
|
|
@@ -43,19 +44,24 @@ module RedAmber
|
|
43
44
|
end
|
44
45
|
|
45
46
|
def inspect(limit: 80)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
s
|
53
|
-
|
47
|
+
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
|
48
|
+
# Better performance than `.upcase == 'MINIMUM'``
|
49
|
+
"#{self.class}(:#{type}, size=#{size})"
|
50
|
+
else
|
51
|
+
sio = StringIO.new << '['
|
52
|
+
to_a.each_with_object(sio).with_index do |(e, s), i|
|
53
|
+
next_str = "#{s.size > 1 ? ', ' : ''}#{e.inspect}"
|
54
|
+
if (s.size + next_str.size) < limit
|
55
|
+
s << next_str
|
56
|
+
else
|
57
|
+
s << ', ... ' if i < size
|
58
|
+
break
|
59
|
+
end
|
54
60
|
end
|
55
|
-
|
56
|
-
sio << ']'
|
61
|
+
sio << ']'
|
57
62
|
|
58
|
-
|
63
|
+
format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n", object_id, sio.string
|
64
|
+
end
|
59
65
|
end
|
60
66
|
|
61
67
|
def values
|
@@ -71,7 +77,7 @@ module RedAmber
|
|
71
77
|
alias_method :indeces, :indices
|
72
78
|
|
73
79
|
def to_ary
|
74
|
-
|
80
|
+
values
|
75
81
|
end
|
76
82
|
|
77
83
|
def size
|
@@ -110,6 +116,10 @@ module RedAmber
|
|
110
116
|
type_class == Arrow::StringDataType
|
111
117
|
end
|
112
118
|
|
119
|
+
def dictionary?
|
120
|
+
type_class == Arrow::DictionaryDataType
|
121
|
+
end
|
122
|
+
|
113
123
|
def temporal?
|
114
124
|
type_class < Arrow::TemporalDataType
|
115
125
|
end
|
@@ -122,14 +132,23 @@ module RedAmber
|
|
122
132
|
return enum_for(:each) unless block_given?
|
123
133
|
|
124
134
|
size.times do |i|
|
125
|
-
yield
|
135
|
+
yield data[i]
|
126
136
|
end
|
127
137
|
end
|
128
138
|
|
139
|
+
def map(&block)
|
140
|
+
return enum_for(:map) unless block
|
141
|
+
|
142
|
+
Vector.new(to_a.map(&block))
|
143
|
+
end
|
144
|
+
alias_method :collect, :map
|
145
|
+
|
146
|
+
# undocumented
|
129
147
|
def chunked?
|
130
148
|
@data.is_a? Arrow::ChunkedArray
|
131
149
|
end
|
132
150
|
|
151
|
+
# undocumented
|
133
152
|
def n_chunks
|
134
153
|
chunked? ? @data.n_chunks : 0
|
135
154
|
end
|
@@ -34,13 +34,6 @@ module RedAmber
|
|
34
34
|
end
|
35
35
|
alias_method :std, :sd
|
36
36
|
|
37
|
-
# option(s) required
|
38
|
-
# - index
|
39
|
-
|
40
|
-
# Returns other than value
|
41
|
-
# - mode
|
42
|
-
# - tdigest
|
43
|
-
|
44
37
|
# Return quantile
|
45
38
|
# 0.5 quantile (median) is returned by default.
|
46
39
|
# Or return quantile for specified probability (prob).
|
@@ -88,8 +81,9 @@ module RedAmber
|
|
88
81
|
|
89
82
|
# [Unary element-wise]: vector.func => vector
|
90
83
|
unary_element_wise =
|
91
|
-
%i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward
|
92
|
-
is_inf is_nan is_null is_valid
|
84
|
+
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos fill_null_backward \
|
85
|
+
fill_null_forward floor is_finite is_inf is_nan is_null is_valid ln log10 log1p log2 \
|
86
|
+
round round_to_multiple sign sin tan trunc unique]
|
93
87
|
unary_element_wise.each do |function|
|
94
88
|
define_method(function) do |**options|
|
95
89
|
datum = exec_func_unary(function, options)
|
@@ -129,16 +123,9 @@ module RedAmber
|
|
129
123
|
end
|
130
124
|
alias_method :not, :invert
|
131
125
|
|
132
|
-
# NaN support needed
|
133
|
-
# - acos asin ln log10 log1p log2
|
134
|
-
|
135
|
-
# Functions with numerical range check
|
136
|
-
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
137
|
-
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
138
|
-
|
139
126
|
# [Binary element-wise]: vector.func(other) => vector
|
140
127
|
binary_element_wise =
|
141
|
-
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
|
128
|
+
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
|
142
129
|
binary_element_wise.each do |function|
|
143
130
|
define_method(function) do |other, **options|
|
144
131
|
datum = exec_func_binary(function, other, options)
|
@@ -162,13 +149,6 @@ module RedAmber
|
|
162
149
|
end
|
163
150
|
end
|
164
151
|
|
165
|
-
# NaN support needed
|
166
|
-
# - logb
|
167
|
-
|
168
|
-
# Functions with numerical range check
|
169
|
-
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
170
|
-
# shift_left_checked shift_right_checked
|
171
|
-
|
172
152
|
# [Binary element-wise with operator]: vector.func(other) => vector
|
173
153
|
binary_element_wise_op = {
|
174
154
|
add: '+',
|
@@ -216,6 +196,23 @@ module RedAmber
|
|
216
196
|
[Vector.new(Array(other) * size), self]
|
217
197
|
end
|
218
198
|
|
199
|
+
# < Not implimented yet > ---
|
200
|
+
|
201
|
+
# option(s) required
|
202
|
+
# - index
|
203
|
+
|
204
|
+
# Returns other than value
|
205
|
+
# - mode
|
206
|
+
# - tdigest
|
207
|
+
|
208
|
+
# Functions with numerical range check (unary)
|
209
|
+
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
210
|
+
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
211
|
+
|
212
|
+
# Functions with numerical range check (binary)
|
213
|
+
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
214
|
+
# shift_left_checked shift_right_checked
|
215
|
+
|
219
216
|
# (array functions)
|
220
217
|
# dictionary_encode,
|
221
218
|
# partition_nth_indices,
|
@@ -25,7 +25,13 @@ module RedAmber
|
|
25
25
|
end
|
26
26
|
|
27
27
|
# TODO: support for option {null_selection_behavior: :drop}
|
28
|
-
def filter(*booleans)
|
28
|
+
def filter(*booleans, &block)
|
29
|
+
if block
|
30
|
+
raise VectorArgumentError, 'Must not specify both arguments and block.' unless booleans.empty?
|
31
|
+
|
32
|
+
booleans = [yield]
|
33
|
+
end
|
34
|
+
|
29
35
|
booleans.flatten!
|
30
36
|
return Vector.new([]) if booleans.empty?
|
31
37
|
|
@@ -46,6 +52,8 @@ module RedAmber
|
|
46
52
|
|
47
53
|
filter_by_array(boolean_array) # returns sub Vector
|
48
54
|
end
|
55
|
+
alias_method :select, :filter
|
56
|
+
alias_method :find_all, :filter
|
49
57
|
|
50
58
|
# @param indices
|
51
59
|
# @param booleans
|
@@ -82,16 +90,17 @@ module RedAmber
|
|
82
90
|
|
83
91
|
# @param values [Array, Arrow::Array, Vector]
|
84
92
|
def is_in(*values)
|
85
|
-
|
93
|
+
self_data = chunked? ? data.pack : data
|
94
|
+
|
86
95
|
array =
|
87
|
-
case values
|
88
|
-
|
89
|
-
values[0].
|
90
|
-
|
91
|
-
values
|
96
|
+
case values
|
97
|
+
in [Vector] | [Arrow::Array] | [Arrow::ChunkedArray]
|
98
|
+
values[0].to_a
|
99
|
+
else
|
100
|
+
Array(values).flatten
|
92
101
|
end
|
93
|
-
|
94
|
-
Vector.new(
|
102
|
+
|
103
|
+
Vector.new(self_data.is_in(array))
|
95
104
|
end
|
96
105
|
|
97
106
|
# Arrow's support required
|
@@ -8,9 +8,10 @@ module RedAmber
|
|
8
8
|
# Functions to make up some data (especially missing) for new data.
|
9
9
|
module VectorUpdatable
|
10
10
|
# Replace data
|
11
|
-
# @param arg [Array, Vector, Arrow::Array] index specifier
|
11
|
+
# @param arg [Array, Vector, Arrow::Array] index specifier or boolean
|
12
12
|
# @param replacer [Array, Vector, Arrow::Array] new data to replace for.
|
13
|
-
# @return [Vector] Replaced new Vector
|
13
|
+
# @return [Vector] Replaced new Vector.
|
14
|
+
# If arg has no true, return self.
|
14
15
|
def replace(args, replacer)
|
15
16
|
args =
|
16
17
|
case args
|
@@ -24,10 +25,12 @@ module RedAmber
|
|
24
25
|
replacer = Array(replacer)
|
25
26
|
return self if args.empty? || args[0].nil?
|
26
27
|
|
27
|
-
replacer = nil if replacer.empty?
|
28
28
|
vector = parse_to_vector(args)
|
29
|
+
replacer = nil if replacer.empty?
|
29
30
|
booleans =
|
30
31
|
if vector.boolean?
|
32
|
+
return self unless vector.any
|
33
|
+
|
31
34
|
vector
|
32
35
|
elsif vector.numeric?
|
33
36
|
replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
@@ -5,6 +5,7 @@ require 'arrow'
|
|
5
5
|
require_relative 'red_amber/helper'
|
6
6
|
require_relative 'red_amber/data_frame_displayable'
|
7
7
|
require_relative 'red_amber/data_frame_indexable'
|
8
|
+
require_relative 'red_amber/data_frame_loadsave'
|
8
9
|
require_relative 'red_amber/data_frame_reshaping'
|
9
10
|
require_relative 'red_amber/data_frame_selectable'
|
10
11
|
require_relative 'red_amber/data_frame_variable_operation'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -45,16 +45,25 @@ files:
|
|
45
45
|
- doc/CODE_OF_CONDUCT.md
|
46
46
|
- doc/DataFrame.md
|
47
47
|
- doc/Vector.md
|
48
|
-
- doc/examples_of_red_amber.ipynb
|
49
48
|
- doc/image/arrow_table_new.png
|
49
|
+
- doc/image/basic_verbs.png
|
50
50
|
- doc/image/dataframe/assign.png
|
51
|
+
- doc/image/dataframe/assign_operation.png
|
51
52
|
- doc/image/dataframe/drop.png
|
52
53
|
- doc/image/dataframe/pick.png
|
54
|
+
- doc/image/dataframe/pick_operation.png
|
53
55
|
- doc/image/dataframe/remove.png
|
54
56
|
- doc/image/dataframe/rename.png
|
57
|
+
- doc/image/dataframe/rename_operation.png
|
58
|
+
- doc/image/dataframe/reshaping_DataFrames.png
|
55
59
|
- doc/image/dataframe/slice.png
|
60
|
+
- doc/image/dataframe/slice_operation.png
|
56
61
|
- doc/image/dataframe_model.png
|
57
62
|
- doc/image/example_in_red_arrow.png
|
63
|
+
- doc/image/group_operation.png
|
64
|
+
- doc/image/replace-if_then.png
|
65
|
+
- doc/image/reshaping_dataframe.png
|
66
|
+
- doc/image/screenshot.png
|
58
67
|
- doc/image/tdr.png
|
59
68
|
- doc/image/tdr_and_table.png
|
60
69
|
- doc/image/tidy_data_in_TDR.png
|
@@ -69,6 +78,7 @@ files:
|
|
69
78
|
- lib/red_amber/data_frame.rb
|
70
79
|
- lib/red_amber/data_frame_displayable.rb
|
71
80
|
- lib/red_amber/data_frame_indexable.rb
|
81
|
+
- lib/red_amber/data_frame_loadsave.rb
|
72
82
|
- lib/red_amber/data_frame_reshaping.rb
|
73
83
|
- lib/red_amber/data_frame_selectable.rb
|
74
84
|
- lib/red_amber/data_frame_variable_operation.rb
|