red_amber 0.1.7 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +12 -2
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +164 -2
- data/Gemfile +2 -1
- data/README.md +246 -17
- data/doc/DataFrame.md +392 -129
- data/doc/Vector.md +37 -19
- data/doc/examples_of_red_amber.ipynb +8979 -0
- data/lib/red_amber/data_frame.rb +138 -24
- data/lib/red_amber/data_frame_displayable.rb +35 -18
- data/lib/red_amber/data_frame_reshaping.rb +85 -0
- data/lib/red_amber/data_frame_selectable.rb +53 -9
- data/lib/red_amber/data_frame_variable_operation.rb +130 -50
- data/lib/red_amber/group.rb +29 -27
- data/lib/red_amber/vector.rb +1 -1
- data/lib/red_amber/vector_functions.rb +65 -23
- data/lib/red_amber/vector_selectable.rb +12 -9
- data/lib/red_amber/vector_updatable.rb +22 -1
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -1
- data/red_amber.gemspec +1 -1
- metadata +7 -5
- data/doc/47_examples_of_red_amber.ipynb +0 -4872
@@ -9,12 +9,16 @@ module RedAmber
|
|
9
9
|
if block
|
10
10
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
11
11
|
|
12
|
-
picker = instance_eval(&block)
|
12
|
+
picker = [instance_eval(&block)]
|
13
13
|
end
|
14
|
-
picker
|
14
|
+
picker.flatten!
|
15
15
|
return DataFrame.new if picker.empty? || picker == [nil]
|
16
16
|
|
17
|
-
|
17
|
+
key_vector = Vector.new(keys)
|
18
|
+
picker_vector = parse_to_vector(picker)
|
19
|
+
|
20
|
+
picker = key_vector.filter(*picker_vector).to_a if picker_vector.boolean?
|
21
|
+
picker = key_vector.take(*picker_vector).to_a if picker_vector.numeric?
|
18
22
|
|
19
23
|
# DataFrame#[] creates a Vector with single key is specified.
|
20
24
|
# DataFrame#pick creates a DataFrame with single key.
|
@@ -29,12 +33,22 @@ module RedAmber
|
|
29
33
|
if block
|
30
34
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
31
35
|
|
32
|
-
dropper = instance_eval(&block)
|
36
|
+
dropper = [instance_eval(&block)]
|
33
37
|
end
|
34
|
-
dropper
|
35
|
-
|
38
|
+
dropper.flatten!
|
39
|
+
|
40
|
+
key_vector = Vector.new(keys)
|
41
|
+
dropper_vector = parse_to_vector(dropper)
|
42
|
+
|
43
|
+
picker =
|
44
|
+
if dropper_vector.boolean?
|
45
|
+
key_vector.filter(*dropper_vector.primitive_invert).each.map(&:to_sym)
|
46
|
+
elsif dropper_vector.numeric?
|
47
|
+
keys - key_vector.take(*dropper_vector).each.map(&:to_sym)
|
48
|
+
else
|
49
|
+
keys - dropper
|
50
|
+
end
|
36
51
|
|
37
|
-
picker = keys - dropper
|
38
52
|
return DataFrame.new if picker.empty?
|
39
53
|
|
40
54
|
# DataFrame#[] creates a Vector with single key is specified.
|
@@ -44,64 +58,118 @@ module RedAmber
|
|
44
58
|
raise DataFrameArgumentError, "Invalid argument #{args}"
|
45
59
|
end
|
46
60
|
|
47
|
-
# rename variables to create new DataFrame
|
48
|
-
def rename(*
|
49
|
-
renamer = args
|
61
|
+
# rename variables to create a new DataFrame
|
62
|
+
def rename(*renamer, &block)
|
50
63
|
if block
|
51
|
-
raise DataFrameArgumentError, 'Must not specify both arguments and a block' unless
|
64
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and a block' unless renamer.empty?
|
52
65
|
|
53
|
-
renamer = instance_eval(&block)
|
66
|
+
renamer = [instance_eval(&block)]
|
67
|
+
end
|
68
|
+
case renamer
|
69
|
+
in [] | [nil] | [{}] | [[]]
|
70
|
+
return self
|
71
|
+
in [Hash => key_pairs]
|
72
|
+
# noop
|
73
|
+
in [ (Symbol | String) => from, (Symbol | String) => to]
|
74
|
+
key_pairs = { from => to }
|
75
|
+
in [Array => array_in_array]
|
76
|
+
key_pairs = try_convert_to_hash(array_in_array)
|
77
|
+
in [Array, *] => array_in_array1
|
78
|
+
key_pairs = try_convert_to_hash(array_in_array1)
|
79
|
+
else
|
80
|
+
raise DataFrameArgumentError, "Invalid argument #{renamer}"
|
54
81
|
end
|
55
|
-
|
56
|
-
|
82
|
+
rename_by_hash(key_pairs)
|
83
|
+
end
|
57
84
|
|
58
|
-
|
59
|
-
|
85
|
+
# assign variables to create a new DataFrame
|
86
|
+
def assign(*assigner, &block)
|
87
|
+
appender, fields, arrays = assign_update(*assigner, &block)
|
88
|
+
return self if appender.is_a?(DataFrame)
|
60
89
|
|
61
|
-
|
90
|
+
append_to_fields_and_arrays(appender, fields, arrays, append_to_left: false) unless appender.empty?
|
91
|
+
|
92
|
+
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
62
93
|
end
|
63
94
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
95
|
+
def assign_left(*assigner, &block)
|
96
|
+
appender, fields, arrays = assign_update(*assigner, &block)
|
97
|
+
return self if appender.is_a?(DataFrame)
|
98
|
+
|
99
|
+
append_to_fields_and_arrays(appender, fields, arrays, append_to_left: true) unless appender.empty?
|
69
100
|
|
70
|
-
|
101
|
+
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
102
|
+
end
|
103
|
+
|
104
|
+
private
|
105
|
+
|
106
|
+
def assign_update(*assigner, &block)
|
107
|
+
if block
|
108
|
+
assigner_from_block = instance_eval(&block)
|
109
|
+
assigner =
|
110
|
+
if assigner.empty?
|
111
|
+
# block only
|
112
|
+
[assigner_from_block]
|
113
|
+
# If Ruby >= 3.0, one line pattern match can be used
|
114
|
+
# assigner_from_block in [Array, *]
|
115
|
+
elsif multiple_assigner?(assigner_from_block)
|
116
|
+
assigner.zip(assigner_from_block)
|
117
|
+
else
|
118
|
+
assigner.zip([assigner_from_block])
|
119
|
+
end
|
71
120
|
end
|
72
|
-
assigner = [assigner].flatten
|
73
|
-
return self if assigner.empty? || assigner == [nil]
|
74
121
|
|
75
|
-
|
122
|
+
case assigner
|
123
|
+
in [] | [nil] | [{}] | [[]]
|
124
|
+
return self
|
125
|
+
in [Hash => key_array_pairs]
|
126
|
+
# noop
|
127
|
+
in [(Symbol | String) => key, (Vector | Array | Arrow::Array) => array]
|
128
|
+
key_array_pairs = { key => array }
|
129
|
+
in [Array => array_in_array]
|
130
|
+
key_array_pairs = try_convert_to_hash(array_in_array)
|
131
|
+
in [Array, *] => array_in_array1
|
132
|
+
key_array_pairs = try_convert_to_hash(array_in_array1)
|
133
|
+
else
|
134
|
+
raise DataFrameArgumentError, "Invalid argument #{assigner}"
|
135
|
+
end
|
76
136
|
|
77
137
|
updater = {}
|
78
138
|
appender = {}
|
79
|
-
|
139
|
+
key_array_pairs.each do |key, array|
|
140
|
+
raise DataFrameArgumentError, "Empty column data: #{key} => nil" if array.nil?
|
141
|
+
|
80
142
|
if keys.include? key
|
81
|
-
updater[key] =
|
143
|
+
updater[key] = array
|
82
144
|
else
|
83
|
-
appender[key] =
|
145
|
+
appender[key] = array
|
84
146
|
end
|
85
147
|
end
|
86
|
-
|
87
|
-
append_to_fields_and_arrays(appender, fields, arrays) unless appender.empty?
|
88
|
-
|
89
|
-
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
148
|
+
[appender, *update_fields_and_arrays(updater)]
|
90
149
|
end
|
91
150
|
|
92
|
-
|
151
|
+
def try_convert_to_hash(array)
|
152
|
+
array.to_h
|
153
|
+
rescue TypeError
|
154
|
+
[array].to_h
|
155
|
+
rescue TypeError # rubocop:disable Lint/DuplicateRescueException
|
156
|
+
raise DataFrameArgumentError, "Invalid argument in Array #{array}"
|
157
|
+
end
|
93
158
|
|
94
159
|
def rename_by_hash(key_pairs)
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
160
|
+
not_existing_keys = key_pairs.keys - keys
|
161
|
+
raise DataFrameArgumentError, "Not existing: #{not_existing_keys}" unless not_existing_keys.empty?
|
162
|
+
|
163
|
+
fields =
|
164
|
+
keys.map do |key|
|
165
|
+
new_key = key_pairs[key]
|
166
|
+
if new_key
|
167
|
+
Arrow::Field.new(new_key.to_sym, @table[key].data_type)
|
168
|
+
else
|
169
|
+
@table.schema[key]
|
170
|
+
end
|
101
171
|
end
|
102
|
-
|
103
|
-
schema = Arrow::Schema.new(fields)
|
104
|
-
DataFrame.new(Arrow::Table.new(schema, @table.columns))
|
172
|
+
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), @table.columns))
|
105
173
|
end
|
106
174
|
|
107
175
|
def update_fields_and_arrays(updater)
|
@@ -111,7 +179,7 @@ module RedAmber
|
|
111
179
|
data = updater[key]
|
112
180
|
next unless data
|
113
181
|
|
114
|
-
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
|
182
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.nil? || data.size != size
|
115
183
|
|
116
184
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
117
185
|
fields[i] = Arrow::Field.new(key, a.value_data_type)
|
@@ -120,18 +188,30 @@ module RedAmber
|
|
120
188
|
[fields, arrays]
|
121
189
|
end
|
122
190
|
|
123
|
-
def append_to_fields_and_arrays(appender, fields, arrays)
|
124
|
-
appender.
|
191
|
+
def append_to_fields_and_arrays(appender, fields, arrays, append_to_left: false)
|
192
|
+
enum = append_to_left ? appender.reverse_each : appender.each
|
193
|
+
enum.each do |key, data|
|
125
194
|
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
|
126
195
|
|
127
196
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
128
|
-
|
129
|
-
|
197
|
+
|
198
|
+
if append_to_left
|
199
|
+
fields.unshift(Arrow::Field.new(key.to_sym, a.value_data_type))
|
200
|
+
arrays.unshift(Arrow::ChunkedArray.new([a]))
|
201
|
+
else
|
202
|
+
fields << Arrow::Field.new(key.to_sym, a.value_data_type)
|
203
|
+
arrays << Arrow::ChunkedArray.new([a])
|
204
|
+
end
|
130
205
|
end
|
131
206
|
end
|
132
207
|
|
133
|
-
def
|
134
|
-
|
208
|
+
def multiple_assigner?(assigner)
|
209
|
+
case assigner
|
210
|
+
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
211
|
+
true
|
212
|
+
else
|
213
|
+
false
|
214
|
+
end
|
135
215
|
end
|
136
216
|
end
|
137
217
|
end
|
data/lib/red_amber/group.rb
CHANGED
@@ -3,6 +3,10 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# group class
|
5
5
|
class Group
|
6
|
+
# Creates a new Group object.
|
7
|
+
#
|
8
|
+
# @param dataframe [DataFrame] dataframe to be grouped.
|
9
|
+
# @param group_keys [Array<>] keys for grouping.
|
6
10
|
def initialize(dataframe, *group_keys)
|
7
11
|
@dataframe = dataframe
|
8
12
|
@table = @dataframe.table
|
@@ -16,36 +20,30 @@ module RedAmber
|
|
16
20
|
@group = @table.group(*@group_keys)
|
17
21
|
end
|
18
22
|
|
19
|
-
|
20
|
-
|
23
|
+
functions = %i[count sum product mean min max stddev variance]
|
24
|
+
functions.each do |function|
|
25
|
+
define_method(function) do |*summary_keys|
|
26
|
+
by(function, summary_keys)
|
27
|
+
end
|
21
28
|
end
|
22
29
|
|
23
|
-
def
|
24
|
-
|
30
|
+
def inspect
|
31
|
+
tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
|
32
|
+
h[v.key] = v.tally
|
33
|
+
end
|
34
|
+
"#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
|
25
35
|
end
|
26
36
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
def max(*summary_keys)
|
40
|
-
by(:max, summary_keys)
|
41
|
-
end
|
42
|
-
|
43
|
-
def stddev(*summary_keys)
|
44
|
-
by(:stddev, summary_keys)
|
45
|
-
end
|
46
|
-
|
47
|
-
def variance(*summary_keys)
|
48
|
-
by(:variance, summary_keys)
|
37
|
+
def summarize(&block)
|
38
|
+
agg = instance_eval(&block)
|
39
|
+
case agg
|
40
|
+
when DataFrame
|
41
|
+
agg
|
42
|
+
when Array
|
43
|
+
agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
|
44
|
+
else
|
45
|
+
raise GroupArgumentError, "Unknown argument: #{agg}"
|
46
|
+
end
|
49
47
|
end
|
50
48
|
|
51
49
|
private
|
@@ -55,7 +53,11 @@ module RedAmber
|
|
55
53
|
d = summary_keys - @dataframe.keys
|
56
54
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
57
55
|
|
58
|
-
RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
56
|
+
df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
|
57
|
+
df = df[@group_keys, df.keys - @group_keys]
|
58
|
+
# if counts are the same (no nil included), aggregate count columns.
|
59
|
+
df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
|
60
|
+
df
|
59
61
|
end
|
60
62
|
end
|
61
63
|
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -34,18 +34,56 @@ module RedAmber
|
|
34
34
|
end
|
35
35
|
alias_method :std, :sd
|
36
36
|
|
37
|
-
#
|
38
|
-
#
|
37
|
+
# Return quantile
|
38
|
+
# 0.5 quantile (median) is returned by default.
|
39
|
+
# Or return quantile for specified probability (prob).
|
40
|
+
# If quantile lies between two data points, interpolated value is
|
41
|
+
# returned based on selected interpolation method.
|
42
|
+
# Nils and NaNs are ignored.
|
43
|
+
# Nil is returned if there are no valid data point.
|
44
|
+
#
|
45
|
+
# @param prob [Float] probability.
|
46
|
+
# @param interpolation [Symbol] specifies interpolation method to use,
|
47
|
+
# when the quantile lies between the data i and j.
|
48
|
+
# - Default value is :linear, which returns i + (j - i) * fraction.
|
49
|
+
# - :lower returns i.
|
50
|
+
# - :higher returns j.
|
51
|
+
# - :nearest returns i or j, whichever is closer.
|
52
|
+
# - :midpoint returns (i + j) / 2.
|
53
|
+
# @param skip_nils [Boolean] wheather to ignore nil.
|
54
|
+
# @param min_count [Integer] min count.
|
55
|
+
# @return [Float] quantile.
|
56
|
+
def quantile(prob = 0.5, interpolation: :linear, skip_nils: true, min_count: 0)
|
57
|
+
raise VectorArgumentError, "Invalid: probability #{prob} must be between 0 and 1" unless (0..1).cover? prob
|
58
|
+
|
59
|
+
datum = find(:quantile).execute([data],
|
60
|
+
q: prob,
|
61
|
+
interpolation: interpolation,
|
62
|
+
skip_nulls: skip_nils,
|
63
|
+
min_count: min_count)
|
64
|
+
datum.value.to_a.first
|
65
|
+
end
|
39
66
|
|
40
|
-
#
|
41
|
-
#
|
42
|
-
|
43
|
-
|
67
|
+
# Return quantiles in a DataFrame
|
68
|
+
#
|
69
|
+
def quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0], interpolation: :linear, skip_nils: true, min_count: 0)
|
70
|
+
if probs.empty? || !probs.all? { |q| (0..1).cover?(q) }
|
71
|
+
raise VectorArgumentError, "Invarid probavilities #{probs}"
|
72
|
+
end
|
73
|
+
|
74
|
+
DataFrame.new(
|
75
|
+
probs: probs,
|
76
|
+
quantiles: probs.map do |q|
|
77
|
+
quantile(q, interpolation: interpolation, skip_nils: skip_nils, min_count: min_count)
|
78
|
+
end
|
79
|
+
)
|
80
|
+
end
|
44
81
|
|
45
82
|
# [Unary element-wise]: vector.func => vector
|
46
83
|
unary_element_wise =
|
47
|
-
%i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward
|
48
|
-
is_inf is_nan is_null is_valid
|
84
|
+
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos fill_null_backward \
|
85
|
+
fill_null_forward floor is_finite is_inf is_nan is_null is_valid ln log10 log1p log2 \
|
86
|
+
round round_to_multiple sign sin tan trunc unique]
|
49
87
|
unary_element_wise.each do |function|
|
50
88
|
define_method(function) do |**options|
|
51
89
|
datum = exec_func_unary(function, options)
|
@@ -63,6 +101,7 @@ module RedAmber
|
|
63
101
|
|
64
102
|
alias_method :sort_indexes, :array_sort_indices
|
65
103
|
alias_method :sort_indices, :array_sort_indices
|
104
|
+
alias_method :sort_index, :array_sort_indices
|
66
105
|
|
67
106
|
alias_method :uniq, :unique
|
68
107
|
|
@@ -84,16 +123,9 @@ module RedAmber
|
|
84
123
|
end
|
85
124
|
alias_method :not, :invert
|
86
125
|
|
87
|
-
# NaN support needed
|
88
|
-
# - acos asin ln log10 log1p log2
|
89
|
-
|
90
|
-
# Functions with numerical range check
|
91
|
-
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
92
|
-
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
93
|
-
|
94
126
|
# [Binary element-wise]: vector.func(other) => vector
|
95
127
|
binary_element_wise =
|
96
|
-
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
|
128
|
+
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
|
97
129
|
binary_element_wise.each do |function|
|
98
130
|
define_method(function) do |other, **options|
|
99
131
|
datum = exec_func_binary(function, other, options)
|
@@ -117,13 +149,6 @@ module RedAmber
|
|
117
149
|
end
|
118
150
|
end
|
119
151
|
|
120
|
-
# NaN support needed
|
121
|
-
# - logb
|
122
|
-
|
123
|
-
# Functions with numerical range check
|
124
|
-
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
125
|
-
# shift_left_checked shift_right_checked
|
126
|
-
|
127
152
|
# [Binary element-wise with operator]: vector.func(other) => vector
|
128
153
|
binary_element_wise_op = {
|
129
154
|
add: '+',
|
@@ -171,6 +196,23 @@ module RedAmber
|
|
171
196
|
[Vector.new(Array(other) * size), self]
|
172
197
|
end
|
173
198
|
|
199
|
+
# < Not implimented yet > ---
|
200
|
+
|
201
|
+
# option(s) required
|
202
|
+
# - index
|
203
|
+
|
204
|
+
# Returns other than value
|
205
|
+
# - mode
|
206
|
+
# - tdigest
|
207
|
+
|
208
|
+
# Functions with numerical range check (unary)
|
209
|
+
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
210
|
+
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
211
|
+
|
212
|
+
# Functions with numerical range check (binary)
|
213
|
+
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
214
|
+
# shift_left_checked shift_right_checked
|
215
|
+
|
174
216
|
# (array functions)
|
175
217
|
# dictionary_encode,
|
176
218
|
# partition_nth_indices,
|
@@ -64,6 +64,8 @@ module RedAmber
|
|
64
64
|
return filter_by_array(arg)
|
65
65
|
when Arrow::Array
|
66
66
|
array = arg
|
67
|
+
when Range
|
68
|
+
array = normalize_element(arg)
|
67
69
|
else
|
68
70
|
unless arg.is_a?(Numeric) || booleans?([arg])
|
69
71
|
raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
|
@@ -80,16 +82,17 @@ module RedAmber
|
|
80
82
|
|
81
83
|
# @param values [Array, Arrow::Array, Vector]
|
82
84
|
def is_in(*values)
|
83
|
-
|
85
|
+
self_data = chunked? ? data.pack : data
|
86
|
+
|
84
87
|
array =
|
85
|
-
case values
|
86
|
-
|
87
|
-
values[0].
|
88
|
-
|
89
|
-
values
|
88
|
+
case values
|
89
|
+
in [Vector] | [Arrow::Array] | [Arrow::ChunkedArray]
|
90
|
+
values[0].to_a
|
91
|
+
else
|
92
|
+
Array(values).flatten
|
90
93
|
end
|
91
|
-
|
92
|
-
Vector.new(
|
94
|
+
|
95
|
+
Vector.new(self_data.is_in(array))
|
93
96
|
end
|
94
97
|
|
95
98
|
# Arrow's support required
|
@@ -109,7 +112,7 @@ module RedAmber
|
|
109
112
|
|
110
113
|
index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
|
111
114
|
|
112
|
-
datum = find(:
|
115
|
+
datum = find(:take).execute([data, index_array]) # :array_take will fail with ChunkedArray
|
113
116
|
Vector.new(datum.value)
|
114
117
|
end
|
115
118
|
|
@@ -12,7 +12,15 @@ module RedAmber
|
|
12
12
|
# @param replacer [Array, Vector, Arrow::Array] new data to replace for.
|
13
13
|
# @return [Vector] Replaced new Vector
|
14
14
|
def replace(args, replacer)
|
15
|
-
args =
|
15
|
+
args =
|
16
|
+
case args
|
17
|
+
when Array
|
18
|
+
args
|
19
|
+
when Range
|
20
|
+
normalize_element(args)
|
21
|
+
else
|
22
|
+
Array(args)
|
23
|
+
end
|
16
24
|
replacer = Array(replacer)
|
17
25
|
return self if args.empty? || args[0].nil?
|
18
26
|
|
@@ -22,6 +30,7 @@ module RedAmber
|
|
22
30
|
if vector.boolean?
|
23
31
|
vector
|
24
32
|
elsif vector.numeric?
|
33
|
+
replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
|
25
34
|
Vector.new(indices).is_in(vector)
|
26
35
|
else
|
27
36
|
raise VectorArgumentError, "Invalid data type #{args}"
|
@@ -50,6 +59,18 @@ module RedAmber
|
|
50
59
|
is_nil.if_else(false, self).invert
|
51
60
|
end
|
52
61
|
|
62
|
+
def shift(amount = 1, fill: nil)
|
63
|
+
raise VectorArgumentError, 'Shift amount is too large' if amount.abs > size
|
64
|
+
|
65
|
+
if amount.positive?
|
66
|
+
replace(amount..-1, self[0...-amount]).replace(0...amount, fill)
|
67
|
+
elsif amount.negative?
|
68
|
+
replace(0...amount, self[-amount..]).replace(amount..-1, fill)
|
69
|
+
else # amount == 0
|
70
|
+
self
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
53
74
|
private
|
54
75
|
|
55
76
|
# [Ternary]: replace_with(booleans, replacements) => vector
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'arrow'
|
4
|
-
require 'rover-df'
|
5
4
|
|
6
5
|
require_relative 'red_amber/helper'
|
7
6
|
require_relative 'red_amber/data_frame_displayable'
|
8
7
|
require_relative 'red_amber/data_frame_indexable'
|
8
|
+
require_relative 'red_amber/data_frame_reshaping'
|
9
9
|
require_relative 'red_amber/data_frame_selectable'
|
10
10
|
require_relative 'red_amber/data_frame_variable_operation'
|
11
11
|
require_relative 'red_amber/data_frame'
|
data/red_amber.gemspec
CHANGED
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
|
-
spec.add_dependency 'red-arrow', '>=
|
33
|
+
spec.add_dependency 'red-arrow', '>= 9.0.0'
|
34
34
|
|
35
35
|
# Development dependency has gone to the Gemfile (rubygems/bundler#7237)
|
36
36
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07
|
11
|
+
date: 2022-09-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 9.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 9.0.0
|
27
27
|
description: RedAmber is a simple dataframe library inspired by Rover-df and powered
|
28
28
|
by Red Arrow.
|
29
29
|
email:
|
@@ -34,6 +34,7 @@ extra_rdoc_files: []
|
|
34
34
|
files:
|
35
35
|
- ".rubocop.yml"
|
36
36
|
- ".rubocop_todo.yml"
|
37
|
+
- ".yardopts"
|
37
38
|
- CHANGELOG.md
|
38
39
|
- Gemfile
|
39
40
|
- LICENSE
|
@@ -41,10 +42,10 @@ files:
|
|
41
42
|
- Rakefile
|
42
43
|
- benchmark/csv_load_penguins.yml
|
43
44
|
- benchmark/drop_nil.yml
|
44
|
-
- doc/47_examples_of_red_amber.ipynb
|
45
45
|
- doc/CODE_OF_CONDUCT.md
|
46
46
|
- doc/DataFrame.md
|
47
47
|
- doc/Vector.md
|
48
|
+
- doc/examples_of_red_amber.ipynb
|
48
49
|
- doc/image/arrow_table_new.png
|
49
50
|
- doc/image/dataframe/assign.png
|
50
51
|
- doc/image/dataframe/drop.png
|
@@ -68,6 +69,7 @@ files:
|
|
68
69
|
- lib/red_amber/data_frame.rb
|
69
70
|
- lib/red_amber/data_frame_displayable.rb
|
70
71
|
- lib/red_amber/data_frame_indexable.rb
|
72
|
+
- lib/red_amber/data_frame_reshaping.rb
|
71
73
|
- lib/red_amber/data_frame_selectable.rb
|
72
74
|
- lib/red_amber/data_frame_variable_operation.rb
|
73
75
|
- lib/red_amber/group.rb
|