red_amber 0.1.7 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +12 -2
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +164 -2
- data/Gemfile +2 -1
- data/README.md +246 -17
- data/doc/DataFrame.md +392 -129
- data/doc/Vector.md +37 -19
- data/doc/examples_of_red_amber.ipynb +8979 -0
- data/lib/red_amber/data_frame.rb +138 -24
- data/lib/red_amber/data_frame_displayable.rb +35 -18
- data/lib/red_amber/data_frame_reshaping.rb +85 -0
- data/lib/red_amber/data_frame_selectable.rb +53 -9
- data/lib/red_amber/data_frame_variable_operation.rb +130 -50
- data/lib/red_amber/group.rb +29 -27
- data/lib/red_amber/vector.rb +1 -1
- data/lib/red_amber/vector_functions.rb +65 -23
- data/lib/red_amber/vector_selectable.rb +12 -9
- data/lib/red_amber/vector_updatable.rb +22 -1
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -1
- data/red_amber.gemspec +1 -1
- metadata +7 -5
- data/doc/47_examples_of_red_amber.ipynb +0 -4872
@@ -9,12 +9,16 @@ module RedAmber
|
|
9
9
|
if block
|
10
10
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
11
11
|
|
12
|
-
picker = instance_eval(&block)
|
12
|
+
picker = [instance_eval(&block)]
|
13
13
|
end
|
14
|
-
picker
|
14
|
+
picker.flatten!
|
15
15
|
return DataFrame.new if picker.empty? || picker == [nil]
|
16
16
|
|
17
|
-
|
17
|
+
key_vector = Vector.new(keys)
|
18
|
+
picker_vector = parse_to_vector(picker)
|
19
|
+
|
20
|
+
picker = key_vector.filter(*picker_vector).to_a if picker_vector.boolean?
|
21
|
+
picker = key_vector.take(*picker_vector).to_a if picker_vector.numeric?
|
18
22
|
|
19
23
|
# DataFrame#[] creates a Vector with single key is specified.
|
20
24
|
# DataFrame#pick creates a DataFrame with single key.
|
@@ -29,12 +33,22 @@ module RedAmber
|
|
29
33
|
if block
|
30
34
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
31
35
|
|
32
|
-
dropper = instance_eval(&block)
|
36
|
+
dropper = [instance_eval(&block)]
|
33
37
|
end
|
34
|
-
dropper
|
35
|
-
|
38
|
+
dropper.flatten!
|
39
|
+
|
40
|
+
key_vector = Vector.new(keys)
|
41
|
+
dropper_vector = parse_to_vector(dropper)
|
42
|
+
|
43
|
+
picker =
|
44
|
+
if dropper_vector.boolean?
|
45
|
+
key_vector.filter(*dropper_vector.primitive_invert).each.map(&:to_sym)
|
46
|
+
elsif dropper_vector.numeric?
|
47
|
+
keys - key_vector.take(*dropper_vector).each.map(&:to_sym)
|
48
|
+
else
|
49
|
+
keys - dropper
|
50
|
+
end
|
36
51
|
|
37
|
-
picker = keys - dropper
|
38
52
|
return DataFrame.new if picker.empty?
|
39
53
|
|
40
54
|
# DataFrame#[] creates a Vector with single key is specified.
|
@@ -44,64 +58,118 @@ module RedAmber
|
|
44
58
|
raise DataFrameArgumentError, "Invalid argument #{args}"
|
45
59
|
end
|
46
60
|
|
47
|
-
# rename variables to create new DataFrame
|
48
|
-
def rename(*
|
49
|
-
renamer = args
|
61
|
+
# rename variables to create a new DataFrame
|
62
|
+
def rename(*renamer, &block)
|
50
63
|
if block
|
51
|
-
raise DataFrameArgumentError, 'Must not specify both arguments and a block' unless
|
64
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and a block' unless renamer.empty?
|
52
65
|
|
53
|
-
renamer = instance_eval(&block)
|
66
|
+
renamer = [instance_eval(&block)]
|
67
|
+
end
|
68
|
+
case renamer
|
69
|
+
in [] | [nil] | [{}] | [[]]
|
70
|
+
return self
|
71
|
+
in [Hash => key_pairs]
|
72
|
+
# noop
|
73
|
+
in [ (Symbol | String) => from, (Symbol | String) => to]
|
74
|
+
key_pairs = { from => to }
|
75
|
+
in [Array => array_in_array]
|
76
|
+
key_pairs = try_convert_to_hash(array_in_array)
|
77
|
+
in [Array, *] => array_in_array1
|
78
|
+
key_pairs = try_convert_to_hash(array_in_array1)
|
79
|
+
else
|
80
|
+
raise DataFrameArgumentError, "Invalid argument #{renamer}"
|
54
81
|
end
|
55
|
-
|
56
|
-
|
82
|
+
rename_by_hash(key_pairs)
|
83
|
+
end
|
57
84
|
|
58
|
-
|
59
|
-
|
85
|
+
# assign variables to create a new DataFrame
|
86
|
+
def assign(*assigner, &block)
|
87
|
+
appender, fields, arrays = assign_update(*assigner, &block)
|
88
|
+
return self if appender.is_a?(DataFrame)
|
60
89
|
|
61
|
-
|
90
|
+
append_to_fields_and_arrays(appender, fields, arrays, append_to_left: false) unless appender.empty?
|
91
|
+
|
92
|
+
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
62
93
|
end
|
63
94
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
95
|
+
def assign_left(*assigner, &block)
|
96
|
+
appender, fields, arrays = assign_update(*assigner, &block)
|
97
|
+
return self if appender.is_a?(DataFrame)
|
98
|
+
|
99
|
+
append_to_fields_and_arrays(appender, fields, arrays, append_to_left: true) unless appender.empty?
|
69
100
|
|
70
|
-
|
101
|
+
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
102
|
+
end
|
103
|
+
|
104
|
+
private
|
105
|
+
|
106
|
+
def assign_update(*assigner, &block)
|
107
|
+
if block
|
108
|
+
assigner_from_block = instance_eval(&block)
|
109
|
+
assigner =
|
110
|
+
if assigner.empty?
|
111
|
+
# block only
|
112
|
+
[assigner_from_block]
|
113
|
+
# If Ruby >= 3.0, one line pattern match can be used
|
114
|
+
# assigner_from_block in [Array, *]
|
115
|
+
elsif multiple_assigner?(assigner_from_block)
|
116
|
+
assigner.zip(assigner_from_block)
|
117
|
+
else
|
118
|
+
assigner.zip([assigner_from_block])
|
119
|
+
end
|
71
120
|
end
|
72
|
-
assigner = [assigner].flatten
|
73
|
-
return self if assigner.empty? || assigner == [nil]
|
74
121
|
|
75
|
-
|
122
|
+
case assigner
|
123
|
+
in [] | [nil] | [{}] | [[]]
|
124
|
+
return self
|
125
|
+
in [Hash => key_array_pairs]
|
126
|
+
# noop
|
127
|
+
in [(Symbol | String) => key, (Vector | Array | Arrow::Array) => array]
|
128
|
+
key_array_pairs = { key => array }
|
129
|
+
in [Array => array_in_array]
|
130
|
+
key_array_pairs = try_convert_to_hash(array_in_array)
|
131
|
+
in [Array, *] => array_in_array1
|
132
|
+
key_array_pairs = try_convert_to_hash(array_in_array1)
|
133
|
+
else
|
134
|
+
raise DataFrameArgumentError, "Invalid argument #{assigner}"
|
135
|
+
end
|
76
136
|
|
77
137
|
updater = {}
|
78
138
|
appender = {}
|
79
|
-
|
139
|
+
key_array_pairs.each do |key, array|
|
140
|
+
raise DataFrameArgumentError, "Empty column data: #{key} => nil" if array.nil?
|
141
|
+
|
80
142
|
if keys.include? key
|
81
|
-
updater[key] =
|
143
|
+
updater[key] = array
|
82
144
|
else
|
83
|
-
appender[key] =
|
145
|
+
appender[key] = array
|
84
146
|
end
|
85
147
|
end
|
86
|
-
|
87
|
-
append_to_fields_and_arrays(appender, fields, arrays) unless appender.empty?
|
88
|
-
|
89
|
-
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
148
|
+
[appender, *update_fields_and_arrays(updater)]
|
90
149
|
end
|
91
150
|
|
92
|
-
|
151
|
+
def try_convert_to_hash(array)
|
152
|
+
array.to_h
|
153
|
+
rescue TypeError
|
154
|
+
[array].to_h
|
155
|
+
rescue TypeError # rubocop:disable Lint/DuplicateRescueException
|
156
|
+
raise DataFrameArgumentError, "Invalid argument in Array #{array}"
|
157
|
+
end
|
93
158
|
|
94
159
|
def rename_by_hash(key_pairs)
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
160
|
+
not_existing_keys = key_pairs.keys - keys
|
161
|
+
raise DataFrameArgumentError, "Not existing: #{not_existing_keys}" unless not_existing_keys.empty?
|
162
|
+
|
163
|
+
fields =
|
164
|
+
keys.map do |key|
|
165
|
+
new_key = key_pairs[key]
|
166
|
+
if new_key
|
167
|
+
Arrow::Field.new(new_key.to_sym, @table[key].data_type)
|
168
|
+
else
|
169
|
+
@table.schema[key]
|
170
|
+
end
|
101
171
|
end
|
102
|
-
|
103
|
-
schema = Arrow::Schema.new(fields)
|
104
|
-
DataFrame.new(Arrow::Table.new(schema, @table.columns))
|
172
|
+
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), @table.columns))
|
105
173
|
end
|
106
174
|
|
107
175
|
def update_fields_and_arrays(updater)
|
@@ -111,7 +179,7 @@ module RedAmber
|
|
111
179
|
data = updater[key]
|
112
180
|
next unless data
|
113
181
|
|
114
|
-
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
|
182
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.nil? || data.size != size
|
115
183
|
|
116
184
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
117
185
|
fields[i] = Arrow::Field.new(key, a.value_data_type)
|
@@ -120,18 +188,30 @@ module RedAmber
|
|
120
188
|
[fields, arrays]
|
121
189
|
end
|
122
190
|
|
123
|
-
def append_to_fields_and_arrays(appender, fields, arrays)
|
124
|
-
appender.
|
191
|
+
def append_to_fields_and_arrays(appender, fields, arrays, append_to_left: false)
|
192
|
+
enum = append_to_left ? appender.reverse_each : appender.each
|
193
|
+
enum.each do |key, data|
|
125
194
|
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
|
126
195
|
|
127
196
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
128
|
-
|
129
|
-
|
197
|
+
|
198
|
+
if append_to_left
|
199
|
+
fields.unshift(Arrow::Field.new(key.to_sym, a.value_data_type))
|
200
|
+
arrays.unshift(Arrow::ChunkedArray.new([a]))
|
201
|
+
else
|
202
|
+
fields << Arrow::Field.new(key.to_sym, a.value_data_type)
|
203
|
+
arrays << Arrow::ChunkedArray.new([a])
|
204
|
+
end
|
130
205
|
end
|
131
206
|
end
|
132
207
|
|
133
|
-
def
|
134
|
-
|
208
|
+
def multiple_assigner?(assigner)
|
209
|
+
case assigner
|
210
|
+
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
211
|
+
true
|
212
|
+
else
|
213
|
+
false
|
214
|
+
end
|
135
215
|
end
|
136
216
|
end
|
137
217
|
end
|
data/lib/red_amber/group.rb
CHANGED
@@ -3,6 +3,10 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# group class
|
5
5
|
class Group
|
6
|
+
# Creates a new Group object.
|
7
|
+
#
|
8
|
+
# @param dataframe [DataFrame] dataframe to be grouped.
|
9
|
+
# @param group_keys [Array<>] keys for grouping.
|
6
10
|
def initialize(dataframe, *group_keys)
|
7
11
|
@dataframe = dataframe
|
8
12
|
@table = @dataframe.table
|
@@ -16,36 +20,30 @@ module RedAmber
|
|
16
20
|
@group = @table.group(*@group_keys)
|
17
21
|
end
|
18
22
|
|
19
|
-
|
20
|
-
|
23
|
+
functions = %i[count sum product mean min max stddev variance]
|
24
|
+
functions.each do |function|
|
25
|
+
define_method(function) do |*summary_keys|
|
26
|
+
by(function, summary_keys)
|
27
|
+
end
|
21
28
|
end
|
22
29
|
|
23
|
-
def
|
24
|
-
|
30
|
+
def inspect
|
31
|
+
tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
|
32
|
+
h[v.key] = v.tally
|
33
|
+
end
|
34
|
+
"#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
|
25
35
|
end
|
26
36
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
def max(*summary_keys)
|
40
|
-
by(:max, summary_keys)
|
41
|
-
end
|
42
|
-
|
43
|
-
def stddev(*summary_keys)
|
44
|
-
by(:stddev, summary_keys)
|
45
|
-
end
|
46
|
-
|
47
|
-
def variance(*summary_keys)
|
48
|
-
by(:variance, summary_keys)
|
37
|
+
def summarize(&block)
|
38
|
+
agg = instance_eval(&block)
|
39
|
+
case agg
|
40
|
+
when DataFrame
|
41
|
+
agg
|
42
|
+
when Array
|
43
|
+
agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
|
44
|
+
else
|
45
|
+
raise GroupArgumentError, "Unknown argument: #{agg}"
|
46
|
+
end
|
49
47
|
end
|
50
48
|
|
51
49
|
private
|
@@ -55,7 +53,11 @@ module RedAmber
|
|
55
53
|
d = summary_keys - @dataframe.keys
|
56
54
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
57
55
|
|
58
|
-
RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
56
|
+
df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
57
|
+
df = df[@group_keys, df.keys - @group_keys]
|
58
|
+
# if counts are the same (no nil included), aggregate count columns.
|
59
|
+
df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
|
60
|
+
df
|
59
61
|
end
|
60
62
|
end
|
61
63
|
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -34,18 +34,56 @@ module RedAmber
|
|
34
34
|
end
|
35
35
|
alias_method :std, :sd
|
36
36
|
|
37
|
-
#
|
38
|
-
#
|
37
|
+
# Return quantile
|
38
|
+
# 0.5 quantile (median) is returned by default.
|
39
|
+
# Or return quantile for specified probability (prob).
|
40
|
+
# If quantile lies between two data points, interpolated value is
|
41
|
+
# returned based on selected interpolation method.
|
42
|
+
# Nils and NaNs are ignored.
|
43
|
+
# Nil is returned if there are no valid data point.
|
44
|
+
#
|
45
|
+
# @param prob [Float] probability.
|
46
|
+
# @param interpolation [Symbol] specifies interpolation method to use,
|
47
|
+
# when the quantile lies between the data i and j.
|
48
|
+
# - Default value is :linear, which returns i + (j - i) * fraction.
|
49
|
+
# - :lower returns i.
|
50
|
+
# - :higher returns j.
|
51
|
+
# - :nearest returns i or j, whichever is closer.
|
52
|
+
# - :midpoint returns (i + j) / 2.
|
53
|
+
# @param skip_nils [Boolean] wheather to ignore nil.
|
54
|
+
# @param min_count [Integer] min count.
|
55
|
+
# @return [Float] quantile.
|
56
|
+
def quantile(prob = 0.5, interpolation: :linear, skip_nils: true, min_count: 0)
|
57
|
+
raise VectorArgumentError, "Invalid: probability #{prob} must be between 0 and 1" unless (0..1).cover? prob
|
58
|
+
|
59
|
+
datum = find(:quantile).execute([data],
|
60
|
+
q: prob,
|
61
|
+
interpolation: interpolation,
|
62
|
+
skip_nulls: skip_nils,
|
63
|
+
min_count: min_count)
|
64
|
+
datum.value.to_a.first
|
65
|
+
end
|
39
66
|
|
40
|
-
#
|
41
|
-
#
|
42
|
-
|
43
|
-
|
67
|
+
# Return quantiles in a DataFrame
|
68
|
+
#
|
69
|
+
def quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0], interpolation: :linear, skip_nils: true, min_count: 0)
|
70
|
+
if probs.empty? || !probs.all? { |q| (0..1).cover?(q) }
|
71
|
+
raise VectorArgumentError, "Invarid probavilities #{probs}"
|
72
|
+
end
|
73
|
+
|
74
|
+
DataFrame.new(
|
75
|
+
probs: probs,
|
76
|
+
quantiles: probs.map do |q|
|
77
|
+
quantile(q, interpolation: interpolation, skip_nils: skip_nils, min_count: min_count)
|
78
|
+
end
|
79
|
+
)
|
80
|
+
end
|
44
81
|
|
45
82
|
# [Unary element-wise]: vector.func => vector
|
46
83
|
unary_element_wise =
|
47
|
-
%i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward
|
48
|
-
is_inf is_nan is_null is_valid
|
84
|
+
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos fill_null_backward \
|
85
|
+
fill_null_forward floor is_finite is_inf is_nan is_null is_valid ln log10 log1p log2 \
|
86
|
+
round round_to_multiple sign sin tan trunc unique]
|
49
87
|
unary_element_wise.each do |function|
|
50
88
|
define_method(function) do |**options|
|
51
89
|
datum = exec_func_unary(function, options)
|
@@ -63,6 +101,7 @@ module RedAmber
|
|
63
101
|
|
64
102
|
alias_method :sort_indexes, :array_sort_indices
|
65
103
|
alias_method :sort_indices, :array_sort_indices
|
104
|
+
alias_method :sort_index, :array_sort_indices
|
66
105
|
|
67
106
|
alias_method :uniq, :unique
|
68
107
|
|
@@ -84,16 +123,9 @@ module RedAmber
|
|
84
123
|
end
|
85
124
|
alias_method :not, :invert
|
86
125
|
|
87
|
-
# NaN support needed
|
88
|
-
# - acos asin ln log10 log1p log2
|
89
|
-
|
90
|
-
# Functions with numerical range check
|
91
|
-
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
92
|
-
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
93
|
-
|
94
126
|
# [Binary element-wise]: vector.func(other) => vector
|
95
127
|
binary_element_wise =
|
96
|
-
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
|
128
|
+
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
|
97
129
|
binary_element_wise.each do |function|
|
98
130
|
define_method(function) do |other, **options|
|
99
131
|
datum = exec_func_binary(function, other, options)
|
@@ -117,13 +149,6 @@ module RedAmber
|
|
117
149
|
end
|
118
150
|
end
|
119
151
|
|
120
|
-
# NaN support needed
|
121
|
-
# - logb
|
122
|
-
|
123
|
-
# Functions with numerical range check
|
124
|
-
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
125
|
-
# shift_left_checked shift_right_checked
|
126
|
-
|
127
152
|
# [Binary element-wise with operator]: vector.func(other) => vector
|
128
153
|
binary_element_wise_op = {
|
129
154
|
add: '+',
|
@@ -171,6 +196,23 @@ module RedAmber
|
|
171
196
|
[Vector.new(Array(other) * size), self]
|
172
197
|
end
|
173
198
|
|
199
|
+
# < Not implimented yet > ---
|
200
|
+
|
201
|
+
# option(s) required
|
202
|
+
# - index
|
203
|
+
|
204
|
+
# Returns other than value
|
205
|
+
# - mode
|
206
|
+
# - tdigest
|
207
|
+
|
208
|
+
# Functions with numerical range check (unary)
|
209
|
+
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
210
|
+
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
211
|
+
|
212
|
+
# Functions with numerical range check (binary)
|
213
|
+
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
214
|
+
# shift_left_checked shift_right_checked
|
215
|
+
|
174
216
|
# (array functions)
|
175
217
|
# dictionary_encode,
|
176
218
|
# partition_nth_indices,
|
@@ -64,6 +64,8 @@ module RedAmber
|
|
64
64
|
return filter_by_array(arg)
|
65
65
|
when Arrow::Array
|
66
66
|
array = arg
|
67
|
+
when Range
|
68
|
+
array = normalize_element(arg)
|
67
69
|
else
|
68
70
|
unless arg.is_a?(Numeric) || booleans?([arg])
|
69
71
|
raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
|
@@ -80,16 +82,17 @@ module RedAmber
|
|
80
82
|
|
81
83
|
# @param values [Array, Arrow::Array, Vector]
|
82
84
|
def is_in(*values)
|
83
|
-
|
85
|
+
self_data = chunked? ? data.pack : data
|
86
|
+
|
84
87
|
array =
|
85
|
-
case values
|
86
|
-
|
87
|
-
values[0].
|
88
|
-
|
89
|
-
values
|
88
|
+
case values
|
89
|
+
in [Vector] | [Arrow::Array] | [Arrow::ChunkedArray]
|
90
|
+
values[0].to_a
|
91
|
+
else
|
92
|
+
Array(values).flatten
|
90
93
|
end
|
91
|
-
|
92
|
-
Vector.new(
|
94
|
+
|
95
|
+
Vector.new(self_data.is_in(array))
|
93
96
|
end
|
94
97
|
|
95
98
|
# Arrow's support required
|
@@ -109,7 +112,7 @@ module RedAmber
|
|
109
112
|
|
110
113
|
index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
|
111
114
|
|
112
|
-
datum = find(:
|
115
|
+
datum = find(:take).execute([data, index_array]) # :array_take will fail with ChunkedArray
|
113
116
|
Vector.new(datum.value)
|
114
117
|
end
|
115
118
|
|
@@ -12,7 +12,15 @@ module RedAmber
|
|
12
12
|
# @param replacer [Array, Vector, Arrow::Array] new data to replace for.
|
13
13
|
# @return [Vector] Replaced new Vector
|
14
14
|
def replace(args, replacer)
|
15
|
-
args =
|
15
|
+
args =
|
16
|
+
case args
|
17
|
+
when Array
|
18
|
+
args
|
19
|
+
when Range
|
20
|
+
normalize_element(args)
|
21
|
+
else
|
22
|
+
Array(args)
|
23
|
+
end
|
16
24
|
replacer = Array(replacer)
|
17
25
|
return self if args.empty? || args[0].nil?
|
18
26
|
|
@@ -22,6 +30,7 @@ module RedAmber
|
|
22
30
|
if vector.boolean?
|
23
31
|
vector
|
24
32
|
elsif vector.numeric?
|
33
|
+
replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
|
25
34
|
Vector.new(indices).is_in(vector)
|
26
35
|
else
|
27
36
|
raise VectorArgumentError, "Invalid data type #{args}"
|
@@ -50,6 +59,18 @@ module RedAmber
|
|
50
59
|
is_nil.if_else(false, self).invert
|
51
60
|
end
|
52
61
|
|
62
|
+
def shift(amount = 1, fill: nil)
|
63
|
+
raise VectorArgumentError, 'Shift amount is too large' if amount.abs > size
|
64
|
+
|
65
|
+
if amount.positive?
|
66
|
+
replace(amount..-1, self[0...-amount]).replace(0...amount, fill)
|
67
|
+
elsif amount.negative?
|
68
|
+
replace(0...amount, self[-amount..]).replace(amount..-1, fill)
|
69
|
+
else # amount == 0
|
70
|
+
self
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
53
74
|
private
|
54
75
|
|
55
76
|
# [Ternary]: replace_with(booleans, replacements) => vector
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'arrow'
|
4
|
-
require 'rover-df'
|
5
4
|
|
6
5
|
require_relative 'red_amber/helper'
|
7
6
|
require_relative 'red_amber/data_frame_displayable'
|
8
7
|
require_relative 'red_amber/data_frame_indexable'
|
8
|
+
require_relative 'red_amber/data_frame_reshaping'
|
9
9
|
require_relative 'red_amber/data_frame_selectable'
|
10
10
|
require_relative 'red_amber/data_frame_variable_operation'
|
11
11
|
require_relative 'red_amber/data_frame'
|
data/red_amber.gemspec
CHANGED
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
|
-
spec.add_dependency 'red-arrow', '>=
|
33
|
+
spec.add_dependency 'red-arrow', '>= 9.0.0'
|
34
34
|
|
35
35
|
# Development dependency has gone to the Gemfile (rubygems/bundler#7237)
|
36
36
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07
|
11
|
+
date: 2022-09-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 9.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 9.0.0
|
27
27
|
description: RedAmber is a simple dataframe library inspired by Rover-df and powered
|
28
28
|
by Red Arrow.
|
29
29
|
email:
|
@@ -34,6 +34,7 @@ extra_rdoc_files: []
|
|
34
34
|
files:
|
35
35
|
- ".rubocop.yml"
|
36
36
|
- ".rubocop_todo.yml"
|
37
|
+
- ".yardopts"
|
37
38
|
- CHANGELOG.md
|
38
39
|
- Gemfile
|
39
40
|
- LICENSE
|
@@ -41,10 +42,10 @@ files:
|
|
41
42
|
- Rakefile
|
42
43
|
- benchmark/csv_load_penguins.yml
|
43
44
|
- benchmark/drop_nil.yml
|
44
|
-
- doc/47_examples_of_red_amber.ipynb
|
45
45
|
- doc/CODE_OF_CONDUCT.md
|
46
46
|
- doc/DataFrame.md
|
47
47
|
- doc/Vector.md
|
48
|
+
- doc/examples_of_red_amber.ipynb
|
48
49
|
- doc/image/arrow_table_new.png
|
49
50
|
- doc/image/dataframe/assign.png
|
50
51
|
- doc/image/dataframe/drop.png
|
@@ -68,6 +69,7 @@ files:
|
|
68
69
|
- lib/red_amber/data_frame.rb
|
69
70
|
- lib/red_amber/data_frame_displayable.rb
|
70
71
|
- lib/red_amber/data_frame_indexable.rb
|
72
|
+
- lib/red_amber/data_frame_reshaping.rb
|
71
73
|
- lib/red_amber/data_frame_selectable.rb
|
72
74
|
- lib/red_amber/data_frame_variable_operation.rb
|
73
75
|
- lib/red_amber/group.rb
|