red_amber 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/CHANGELOG.md +58 -0
- data/README.md +38 -24
- data/doc/DataFrame.md +212 -80
- data/doc/Vector.md +7 -18
- data/doc/examples_of_red_amber.ipynb +2720 -524
- data/lib/red_amber/data_frame.rb +23 -4
- data/lib/red_amber/data_frame_displayable.rb +3 -3
- data/lib/red_amber/data_frame_reshaping.rb +10 -10
- data/lib/red_amber/data_frame_selectable.rb +53 -9
- data/lib/red_amber/data_frame_variable_operation.rb +44 -13
- data/lib/red_amber/vector.rb +1 -1
- data/lib/red_amber/vector_functions.rb +21 -24
- data/lib/red_amber/vector_selectable.rb +9 -8
- data/lib/red_amber/version.rb +1 -1
- metadata +2 -2
data/lib/red_amber/data_frame.rb
CHANGED
@@ -159,12 +159,19 @@ module RedAmber
|
|
159
159
|
@vectors || @vectors = init_instance_vars(:vectors)
|
160
160
|
end
|
161
161
|
|
162
|
-
# Returns row indices (
|
162
|
+
# Returns row indices (start...(size+start)) in an Array.
|
163
163
|
#
|
164
|
+
# @param start [Object]
|
165
|
+
# Object which have #succ method.
|
164
166
|
# @return [Array]
|
165
|
-
# An Array of
|
166
|
-
|
167
|
-
|
167
|
+
# An Array of indices of the row.
|
168
|
+
# @example
|
169
|
+
# (when self.size == 5)
|
170
|
+
# - indices #=> [0, 1, 2, 3, 4]
|
171
|
+
# - indices(1) #=> [1, 2, 3, 4, 5]
|
172
|
+
# - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
|
173
|
+
def indices(start = 0)
|
174
|
+
(start..).take(size)
|
168
175
|
end
|
169
176
|
alias_method :indexes, :indices
|
170
177
|
|
@@ -225,6 +232,18 @@ module RedAmber
|
|
225
232
|
g
|
226
233
|
end
|
227
234
|
|
235
|
+
def method_missing(name, *args, &block)
|
236
|
+
return v(name) if args.empty?
|
237
|
+
|
238
|
+
super
|
239
|
+
end
|
240
|
+
|
241
|
+
def respond_to_missing?(name, include_private)
|
242
|
+
return true if key?(name)
|
243
|
+
|
244
|
+
super
|
245
|
+
end
|
246
|
+
|
228
247
|
private
|
229
248
|
|
230
249
|
# initialize @variable, @keys, @vectors and return one of them
|
@@ -154,7 +154,7 @@ module RedAmber
|
|
154
154
|
|
155
155
|
def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
|
156
156
|
original = self
|
157
|
-
indices = size > head + tail ? [*0
|
157
|
+
indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
|
158
158
|
df = slice(indices).assign do
|
159
159
|
assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
|
160
160
|
vectors.each_with_object(assigner) do |v, a|
|
@@ -173,12 +173,12 @@ module RedAmber
|
|
173
173
|
end
|
174
174
|
|
175
175
|
df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
|
176
|
-
df = size > head + tail ? df[0, 0, 0
|
176
|
+
df = size > head + tail ? df[0, 0, 0..head, -tail..-1] : df[0, 0, 0..-1]
|
177
177
|
df = df.assign do
|
178
178
|
vectors.each_with_object({}) do |v, assigner|
|
179
179
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
180
180
|
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
181
|
-
assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
|
181
|
+
assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
182
182
|
end
|
183
183
|
end
|
184
184
|
|
@@ -5,20 +5,20 @@ module RedAmber
|
|
5
5
|
module DataFrameReshaping
|
6
6
|
# Transpose a wide DataFrame.
|
7
7
|
#
|
8
|
-
# @param key [Symbol
|
8
|
+
# @param key [Symbol] key of the index column
|
9
9
|
# to transepose into keys.
|
10
|
-
# If it is
|
11
|
-
# @param new_key [Symbol
|
12
|
-
# If it is
|
10
|
+
# If it is not specified, keys[0] is used.
|
11
|
+
# @param new_key [Symbol] key name of transposed index column.
|
12
|
+
# If it is not specified, :N is used. If it already exists, :N1 or :N1.succ is used.
|
13
13
|
# @return [DataFrame] trnsposed DataFrame
|
14
|
-
def transpose(key: keys.first,
|
15
|
-
raise DataFrameArgumentError, "
|
14
|
+
def transpose(key: keys.first, name: :N)
|
15
|
+
raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
|
16
16
|
|
17
17
|
# Find unused name
|
18
18
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
19
|
-
|
19
|
+
name = (:N1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
|
20
20
|
|
21
|
-
hash = {
|
21
|
+
hash = { name => (keys - [key]) }
|
22
22
|
i = keys.index(key)
|
23
23
|
each_row do |h|
|
24
24
|
k = h.values[i]
|
@@ -33,7 +33,7 @@ module RedAmber
|
|
33
33
|
# @param name [Symbol, String] key of the column which is come **from values**.
|
34
34
|
# @param value [Symbol, String] key of the column which is come **from values**.
|
35
35
|
# @return [DataFrame] long DataFrame.
|
36
|
-
def to_long(*keep_keys, name: :
|
36
|
+
def to_long(*keep_keys, name: :N, value: :V)
|
37
37
|
not_included = keep_keys - keys
|
38
38
|
raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
|
39
39
|
|
@@ -63,7 +63,7 @@ module RedAmber
|
|
63
63
|
# @param name [Symbol, String] key of the column which will be expanded **to key names**.
|
64
64
|
# @param value [Symbol, String] key of the column which will be expanded **to values**.
|
65
65
|
# @return [DataFrame] wide DataFrame.
|
66
|
-
def to_wide(name: :
|
66
|
+
def to_wide(name: :N, value: :V)
|
67
67
|
name = name.to_sym
|
68
68
|
raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
|
69
69
|
|
@@ -3,8 +3,8 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
|
-
# select
|
7
|
-
# select
|
6
|
+
# select columns: [symbol] or [string]
|
7
|
+
# select rows: [array of index], [range]
|
8
8
|
def [](*args)
|
9
9
|
args.flatten!
|
10
10
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
@@ -22,17 +22,17 @@ module RedAmber
|
|
22
22
|
raise DataFrameArgumentError, "Invalid argument: #{args}"
|
23
23
|
end
|
24
24
|
|
25
|
-
# slice and select
|
25
|
+
# slice and select rows to create sub DataFrame
|
26
26
|
def slice(*args, &block)
|
27
27
|
slicer = args
|
28
28
|
if block
|
29
29
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
30
30
|
|
31
|
-
slicer = instance_eval(&block)
|
31
|
+
slicer = [instance_eval(&block)]
|
32
32
|
end
|
33
|
-
slicer
|
33
|
+
slicer.flatten!
|
34
34
|
|
35
|
-
raise DataFrameArgumentError, '
|
35
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
36
36
|
return remove_all_values if slicer.empty? || slicer[0].nil?
|
37
37
|
|
38
38
|
vector = parse_to_vector(slicer)
|
@@ -46,15 +46,59 @@ module RedAmber
|
|
46
46
|
raise DataFrameArgumentError, "Invalid argument #{slicer}"
|
47
47
|
end
|
48
48
|
|
49
|
-
|
49
|
+
def slice_by(key, keep_key: false, &block)
|
50
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
51
|
+
raise DataFrameArgumentError, 'No block given' unless block
|
52
|
+
raise DataFrameArgumentError, "#{key} is no a key of self" unless key?(key)
|
53
|
+
return self if key.nil?
|
54
|
+
|
55
|
+
slicer = instance_eval(&block)
|
56
|
+
return DataFrame.new unless slicer
|
57
|
+
|
58
|
+
if slicer.is_a?(Range)
|
59
|
+
from = slicer.begin
|
60
|
+
from =
|
61
|
+
if from.is_a?(String)
|
62
|
+
self[key].index(from)
|
63
|
+
elsif from.nil?
|
64
|
+
0
|
65
|
+
elsif from < 0
|
66
|
+
size + from
|
67
|
+
else
|
68
|
+
from
|
69
|
+
end
|
70
|
+
to = slicer.end
|
71
|
+
to =
|
72
|
+
if to.is_a?(String)
|
73
|
+
self[key].index(to)
|
74
|
+
elsif to.nil?
|
75
|
+
size - 1
|
76
|
+
elsif to < 0
|
77
|
+
size + to
|
78
|
+
else
|
79
|
+
to
|
80
|
+
end
|
81
|
+
slicer = (from..to).to_a
|
82
|
+
else
|
83
|
+
slicer = slicer.map { |x| x.is_a?(String) ? self[key].index(x) : x }
|
84
|
+
end
|
85
|
+
|
86
|
+
if keep_key
|
87
|
+
take(slicer)
|
88
|
+
else
|
89
|
+
take(slicer).drop(key)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# remove selected rows to create remainer DataFrame
|
50
94
|
def remove(*args, &block)
|
51
95
|
remover = args
|
52
96
|
if block
|
53
97
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
54
98
|
|
55
|
-
remover = instance_eval(&block)
|
99
|
+
remover = [instance_eval(&block)]
|
56
100
|
end
|
57
|
-
remover
|
101
|
+
remover.flatten!
|
58
102
|
|
59
103
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
60
104
|
return self if remover.empty? || remover[0].nil?
|
@@ -9,12 +9,16 @@ module RedAmber
|
|
9
9
|
if block
|
10
10
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
11
11
|
|
12
|
-
picker = instance_eval(&block)
|
12
|
+
picker = [instance_eval(&block)]
|
13
13
|
end
|
14
|
-
picker
|
14
|
+
picker.flatten!
|
15
15
|
return DataFrame.new if picker.empty? || picker == [nil]
|
16
16
|
|
17
|
-
|
17
|
+
key_vector = Vector.new(keys)
|
18
|
+
picker_vector = parse_to_vector(picker)
|
19
|
+
|
20
|
+
picker = key_vector.filter(*picker_vector).to_a if picker_vector.boolean?
|
21
|
+
picker = key_vector.take(*picker_vector).to_a if picker_vector.numeric?
|
18
22
|
|
19
23
|
# DataFrame#[] creates a Vector with single key is specified.
|
20
24
|
# DataFrame#pick creates a DataFrame with single key.
|
@@ -29,12 +33,22 @@ module RedAmber
|
|
29
33
|
if block
|
30
34
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
31
35
|
|
32
|
-
dropper = instance_eval(&block)
|
36
|
+
dropper = [instance_eval(&block)]
|
33
37
|
end
|
34
|
-
dropper
|
35
|
-
|
38
|
+
dropper.flatten!
|
39
|
+
|
40
|
+
key_vector = Vector.new(keys)
|
41
|
+
dropper_vector = parse_to_vector(dropper)
|
42
|
+
|
43
|
+
picker =
|
44
|
+
if dropper_vector.boolean?
|
45
|
+
key_vector.filter(*dropper_vector.primitive_invert).each.map(&:to_sym)
|
46
|
+
elsif dropper_vector.numeric?
|
47
|
+
keys - key_vector.take(*dropper_vector).each.map(&:to_sym)
|
48
|
+
else
|
49
|
+
keys - dropper
|
50
|
+
end
|
36
51
|
|
37
|
-
picker = keys - dropper
|
38
52
|
return DataFrame.new if picker.empty?
|
39
53
|
|
40
54
|
# DataFrame#[] creates a Vector with single key is specified.
|
@@ -91,10 +105,20 @@ module RedAmber
|
|
91
105
|
|
92
106
|
def assign_update(*assigner, &block)
|
93
107
|
if block
|
94
|
-
|
95
|
-
|
96
|
-
|
108
|
+
assigner_from_block = instance_eval(&block)
|
109
|
+
assigner =
|
110
|
+
if assigner.empty?
|
111
|
+
# block only
|
112
|
+
[assigner_from_block]
|
113
|
+
# If Ruby >= 3.0, one line pattern match can be used
|
114
|
+
# assigner_from_block in [Array, *]
|
115
|
+
elsif multiple_assigner?(assigner_from_block)
|
116
|
+
assigner.zip(assigner_from_block)
|
117
|
+
else
|
118
|
+
assigner.zip([assigner_from_block])
|
119
|
+
end
|
97
120
|
end
|
121
|
+
|
98
122
|
case assigner
|
99
123
|
in [] | [nil] | [{}] | [[]]
|
100
124
|
return self
|
@@ -113,6 +137,8 @@ module RedAmber
|
|
113
137
|
updater = {}
|
114
138
|
appender = {}
|
115
139
|
key_array_pairs.each do |key, array|
|
140
|
+
raise DataFrameArgumentError, "Empty column data: #{key} => nil" if array.nil?
|
141
|
+
|
116
142
|
if keys.include? key
|
117
143
|
updater[key] = array
|
118
144
|
else
|
@@ -153,7 +179,7 @@ module RedAmber
|
|
153
179
|
data = updater[key]
|
154
180
|
next unless data
|
155
181
|
|
156
|
-
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
|
182
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.nil? || data.size != size
|
157
183
|
|
158
184
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
159
185
|
fields[i] = Arrow::Field.new(key, a.value_data_type)
|
@@ -179,8 +205,13 @@ module RedAmber
|
|
179
205
|
end
|
180
206
|
end
|
181
207
|
|
182
|
-
def
|
183
|
-
|
208
|
+
def multiple_assigner?(assigner)
|
209
|
+
case assigner
|
210
|
+
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
211
|
+
true
|
212
|
+
else
|
213
|
+
false
|
214
|
+
end
|
184
215
|
end
|
185
216
|
end
|
186
217
|
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -34,13 +34,6 @@ module RedAmber
|
|
34
34
|
end
|
35
35
|
alias_method :std, :sd
|
36
36
|
|
37
|
-
# option(s) required
|
38
|
-
# - index
|
39
|
-
|
40
|
-
# Returns other than value
|
41
|
-
# - mode
|
42
|
-
# - tdigest
|
43
|
-
|
44
37
|
# Return quantile
|
45
38
|
# 0.5 quantile (median) is returned by default.
|
46
39
|
# Or return quantile for specified probability (prob).
|
@@ -88,8 +81,9 @@ module RedAmber
|
|
88
81
|
|
89
82
|
# [Unary element-wise]: vector.func => vector
|
90
83
|
unary_element_wise =
|
91
|
-
%i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward
|
92
|
-
is_inf is_nan is_null is_valid
|
84
|
+
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos fill_null_backward \
|
85
|
+
fill_null_forward floor is_finite is_inf is_nan is_null is_valid ln log10 log1p log2 \
|
86
|
+
round round_to_multiple sign sin tan trunc unique]
|
93
87
|
unary_element_wise.each do |function|
|
94
88
|
define_method(function) do |**options|
|
95
89
|
datum = exec_func_unary(function, options)
|
@@ -129,16 +123,9 @@ module RedAmber
|
|
129
123
|
end
|
130
124
|
alias_method :not, :invert
|
131
125
|
|
132
|
-
# NaN support needed
|
133
|
-
# - acos asin ln log10 log1p log2
|
134
|
-
|
135
|
-
# Functions with numerical range check
|
136
|
-
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
137
|
-
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
138
|
-
|
139
126
|
# [Binary element-wise]: vector.func(other) => vector
|
140
127
|
binary_element_wise =
|
141
|
-
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
|
128
|
+
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
|
142
129
|
binary_element_wise.each do |function|
|
143
130
|
define_method(function) do |other, **options|
|
144
131
|
datum = exec_func_binary(function, other, options)
|
@@ -162,13 +149,6 @@ module RedAmber
|
|
162
149
|
end
|
163
150
|
end
|
164
151
|
|
165
|
-
# NaN support needed
|
166
|
-
# - logb
|
167
|
-
|
168
|
-
# Functions with numerical range check
|
169
|
-
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
170
|
-
# shift_left_checked shift_right_checked
|
171
|
-
|
172
152
|
# [Binary element-wise with operator]: vector.func(other) => vector
|
173
153
|
binary_element_wise_op = {
|
174
154
|
add: '+',
|
@@ -216,6 +196,23 @@ module RedAmber
|
|
216
196
|
[Vector.new(Array(other) * size), self]
|
217
197
|
end
|
218
198
|
|
199
|
+
# < Not implimented yet > ---
|
200
|
+
|
201
|
+
# option(s) required
|
202
|
+
# - index
|
203
|
+
|
204
|
+
# Returns other than value
|
205
|
+
# - mode
|
206
|
+
# - tdigest
|
207
|
+
|
208
|
+
# Functions with numerical range check (unary)
|
209
|
+
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
210
|
+
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
211
|
+
|
212
|
+
# Functions with numerical range check (binary)
|
213
|
+
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
214
|
+
# shift_left_checked shift_right_checked
|
215
|
+
|
219
216
|
# (array functions)
|
220
217
|
# dictionary_encode,
|
221
218
|
# partition_nth_indices,
|
@@ -82,16 +82,17 @@ module RedAmber
|
|
82
82
|
|
83
83
|
# @param values [Array, Arrow::Array, Vector]
|
84
84
|
def is_in(*values)
|
85
|
-
|
85
|
+
self_data = chunked? ? data.pack : data
|
86
|
+
|
86
87
|
array =
|
87
|
-
case values
|
88
|
-
|
89
|
-
values[0].
|
90
|
-
|
91
|
-
values
|
88
|
+
case values
|
89
|
+
in [Vector] | [Arrow::Array] | [Arrow::ChunkedArray]
|
90
|
+
values[0].to_a
|
91
|
+
else
|
92
|
+
Array(values).flatten
|
92
93
|
end
|
93
|
-
|
94
|
-
Vector.new(
|
94
|
+
|
95
|
+
Vector.new(self_data.is_in(array))
|
95
96
|
end
|
96
97
|
|
97
98
|
# Arrow's support required
|
data/lib/red_amber/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|