red_amber 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/CHANGELOG.md +58 -0
- data/README.md +38 -24
- data/doc/DataFrame.md +212 -80
- data/doc/Vector.md +7 -18
- data/doc/examples_of_red_amber.ipynb +2720 -524
- data/lib/red_amber/data_frame.rb +23 -4
- data/lib/red_amber/data_frame_displayable.rb +3 -3
- data/lib/red_amber/data_frame_reshaping.rb +10 -10
- data/lib/red_amber/data_frame_selectable.rb +53 -9
- data/lib/red_amber/data_frame_variable_operation.rb +44 -13
- data/lib/red_amber/vector.rb +1 -1
- data/lib/red_amber/vector_functions.rb +21 -24
- data/lib/red_amber/vector_selectable.rb +9 -8
- data/lib/red_amber/version.rb +1 -1
- metadata +2 -2
data/lib/red_amber/data_frame.rb
CHANGED
@@ -159,12 +159,19 @@ module RedAmber
|
|
159
159
|
@vectors || @vectors = init_instance_vars(:vectors)
|
160
160
|
end
|
161
161
|
|
162
|
-
# Returns row indices (
|
162
|
+
# Returns row indices (start...(size+start)) in an Array.
|
163
163
|
#
|
164
|
+
# @param start [Object]
|
165
|
+
# Object which have #succ method.
|
164
166
|
# @return [Array]
|
165
|
-
# An Array of
|
166
|
-
|
167
|
-
|
167
|
+
# An Array of indices of the row.
|
168
|
+
# @example
|
169
|
+
# (when self.size == 5)
|
170
|
+
# - indices #=> [0, 1, 2, 3, 4]
|
171
|
+
# - indices(1) #=> [1, 2, 3, 4, 5]
|
172
|
+
# - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
|
173
|
+
def indices(start = 0)
|
174
|
+
(start..).take(size)
|
168
175
|
end
|
169
176
|
alias_method :indexes, :indices
|
170
177
|
|
@@ -225,6 +232,18 @@ module RedAmber
|
|
225
232
|
g
|
226
233
|
end
|
227
234
|
|
235
|
+
def method_missing(name, *args, &block)
|
236
|
+
return v(name) if args.empty?
|
237
|
+
|
238
|
+
super
|
239
|
+
end
|
240
|
+
|
241
|
+
def respond_to_missing?(name, include_private)
|
242
|
+
return true if key?(name)
|
243
|
+
|
244
|
+
super
|
245
|
+
end
|
246
|
+
|
228
247
|
private
|
229
248
|
|
230
249
|
# initialize @variable, @keys, @vectors and return one of them
|
@@ -154,7 +154,7 @@ module RedAmber
|
|
154
154
|
|
155
155
|
def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
|
156
156
|
original = self
|
157
|
-
indices = size > head + tail ? [*0
|
157
|
+
indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
|
158
158
|
df = slice(indices).assign do
|
159
159
|
assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
|
160
160
|
vectors.each_with_object(assigner) do |v, a|
|
@@ -173,12 +173,12 @@ module RedAmber
|
|
173
173
|
end
|
174
174
|
|
175
175
|
df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
|
176
|
-
df = size > head + tail ? df[0, 0, 0
|
176
|
+
df = size > head + tail ? df[0, 0, 0..head, -tail..-1] : df[0, 0, 0..-1]
|
177
177
|
df = df.assign do
|
178
178
|
vectors.each_with_object({}) do |v, assigner|
|
179
179
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
180
180
|
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
181
|
-
assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
|
181
|
+
assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
182
182
|
end
|
183
183
|
end
|
184
184
|
|
@@ -5,20 +5,20 @@ module RedAmber
|
|
5
5
|
module DataFrameReshaping
|
6
6
|
# Transpose a wide DataFrame.
|
7
7
|
#
|
8
|
-
# @param key [Symbol
|
8
|
+
# @param key [Symbol] key of the index column
|
9
9
|
# to transepose into keys.
|
10
|
-
# If it is
|
11
|
-
# @param new_key [Symbol
|
12
|
-
# If it is
|
10
|
+
# If it is not specified, keys[0] is used.
|
11
|
+
# @param new_key [Symbol] key name of transposed index column.
|
12
|
+
# If it is not specified, :N is used. If it already exists, :N1 or :N1.succ is used.
|
13
13
|
# @return [DataFrame] trnsposed DataFrame
|
14
|
-
def transpose(key: keys.first,
|
15
|
-
raise DataFrameArgumentError, "
|
14
|
+
def transpose(key: keys.first, name: :N)
|
15
|
+
raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
|
16
16
|
|
17
17
|
# Find unused name
|
18
18
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
19
|
-
|
19
|
+
name = (:N1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
|
20
20
|
|
21
|
-
hash = {
|
21
|
+
hash = { name => (keys - [key]) }
|
22
22
|
i = keys.index(key)
|
23
23
|
each_row do |h|
|
24
24
|
k = h.values[i]
|
@@ -33,7 +33,7 @@ module RedAmber
|
|
33
33
|
# @param name [Symbol, String] key of the column which is come **from values**.
|
34
34
|
# @param value [Symbol, String] key of the column which is come **from values**.
|
35
35
|
# @return [DataFrame] long DataFrame.
|
36
|
-
def to_long(*keep_keys, name: :
|
36
|
+
def to_long(*keep_keys, name: :N, value: :V)
|
37
37
|
not_included = keep_keys - keys
|
38
38
|
raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
|
39
39
|
|
@@ -63,7 +63,7 @@ module RedAmber
|
|
63
63
|
# @param name [Symbol, String] key of the column which will be expanded **to key names**.
|
64
64
|
# @param value [Symbol, String] key of the column which will be expanded **to values**.
|
65
65
|
# @return [DataFrame] wide DataFrame.
|
66
|
-
def to_wide(name: :
|
66
|
+
def to_wide(name: :N, value: :V)
|
67
67
|
name = name.to_sym
|
68
68
|
raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
|
69
69
|
|
@@ -3,8 +3,8 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
|
-
# select
|
7
|
-
# select
|
6
|
+
# select columns: [symbol] or [string]
|
7
|
+
# select rows: [array of index], [range]
|
8
8
|
def [](*args)
|
9
9
|
args.flatten!
|
10
10
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
@@ -22,17 +22,17 @@ module RedAmber
|
|
22
22
|
raise DataFrameArgumentError, "Invalid argument: #{args}"
|
23
23
|
end
|
24
24
|
|
25
|
-
# slice and select
|
25
|
+
# slice and select rows to create sub DataFrame
|
26
26
|
def slice(*args, &block)
|
27
27
|
slicer = args
|
28
28
|
if block
|
29
29
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
30
30
|
|
31
|
-
slicer = instance_eval(&block)
|
31
|
+
slicer = [instance_eval(&block)]
|
32
32
|
end
|
33
|
-
slicer
|
33
|
+
slicer.flatten!
|
34
34
|
|
35
|
-
raise DataFrameArgumentError, '
|
35
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
36
36
|
return remove_all_values if slicer.empty? || slicer[0].nil?
|
37
37
|
|
38
38
|
vector = parse_to_vector(slicer)
|
@@ -46,15 +46,59 @@ module RedAmber
|
|
46
46
|
raise DataFrameArgumentError, "Invalid argument #{slicer}"
|
47
47
|
end
|
48
48
|
|
49
|
-
|
49
|
+
def slice_by(key, keep_key: false, &block)
|
50
|
+
raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
|
51
|
+
raise DataFrameArgumentError, 'No block given' unless block
|
52
|
+
raise DataFrameArgumentError, "#{key} is no a key of self" unless key?(key)
|
53
|
+
return self if key.nil?
|
54
|
+
|
55
|
+
slicer = instance_eval(&block)
|
56
|
+
return DataFrame.new unless slicer
|
57
|
+
|
58
|
+
if slicer.is_a?(Range)
|
59
|
+
from = slicer.begin
|
60
|
+
from =
|
61
|
+
if from.is_a?(String)
|
62
|
+
self[key].index(from)
|
63
|
+
elsif from.nil?
|
64
|
+
0
|
65
|
+
elsif from < 0
|
66
|
+
size + from
|
67
|
+
else
|
68
|
+
from
|
69
|
+
end
|
70
|
+
to = slicer.end
|
71
|
+
to =
|
72
|
+
if to.is_a?(String)
|
73
|
+
self[key].index(to)
|
74
|
+
elsif to.nil?
|
75
|
+
size - 1
|
76
|
+
elsif to < 0
|
77
|
+
size + to
|
78
|
+
else
|
79
|
+
to
|
80
|
+
end
|
81
|
+
slicer = (from..to).to_a
|
82
|
+
else
|
83
|
+
slicer = slicer.map { |x| x.is_a?(String) ? self[key].index(x) : x }
|
84
|
+
end
|
85
|
+
|
86
|
+
if keep_key
|
87
|
+
take(slicer)
|
88
|
+
else
|
89
|
+
take(slicer).drop(key)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# remove selected rows to create remainer DataFrame
|
50
94
|
def remove(*args, &block)
|
51
95
|
remover = args
|
52
96
|
if block
|
53
97
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
54
98
|
|
55
|
-
remover = instance_eval(&block)
|
99
|
+
remover = [instance_eval(&block)]
|
56
100
|
end
|
57
|
-
remover
|
101
|
+
remover.flatten!
|
58
102
|
|
59
103
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
60
104
|
return self if remover.empty? || remover[0].nil?
|
@@ -9,12 +9,16 @@ module RedAmber
|
|
9
9
|
if block
|
10
10
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
11
11
|
|
12
|
-
picker = instance_eval(&block)
|
12
|
+
picker = [instance_eval(&block)]
|
13
13
|
end
|
14
|
-
picker
|
14
|
+
picker.flatten!
|
15
15
|
return DataFrame.new if picker.empty? || picker == [nil]
|
16
16
|
|
17
|
-
|
17
|
+
key_vector = Vector.new(keys)
|
18
|
+
picker_vector = parse_to_vector(picker)
|
19
|
+
|
20
|
+
picker = key_vector.filter(*picker_vector).to_a if picker_vector.boolean?
|
21
|
+
picker = key_vector.take(*picker_vector).to_a if picker_vector.numeric?
|
18
22
|
|
19
23
|
# DataFrame#[] creates a Vector with single key is specified.
|
20
24
|
# DataFrame#pick creates a DataFrame with single key.
|
@@ -29,12 +33,22 @@ module RedAmber
|
|
29
33
|
if block
|
30
34
|
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
31
35
|
|
32
|
-
dropper = instance_eval(&block)
|
36
|
+
dropper = [instance_eval(&block)]
|
33
37
|
end
|
34
|
-
dropper
|
35
|
-
|
38
|
+
dropper.flatten!
|
39
|
+
|
40
|
+
key_vector = Vector.new(keys)
|
41
|
+
dropper_vector = parse_to_vector(dropper)
|
42
|
+
|
43
|
+
picker =
|
44
|
+
if dropper_vector.boolean?
|
45
|
+
key_vector.filter(*dropper_vector.primitive_invert).each.map(&:to_sym)
|
46
|
+
elsif dropper_vector.numeric?
|
47
|
+
keys - key_vector.take(*dropper_vector).each.map(&:to_sym)
|
48
|
+
else
|
49
|
+
keys - dropper
|
50
|
+
end
|
36
51
|
|
37
|
-
picker = keys - dropper
|
38
52
|
return DataFrame.new if picker.empty?
|
39
53
|
|
40
54
|
# DataFrame#[] creates a Vector with single key is specified.
|
@@ -91,10 +105,20 @@ module RedAmber
|
|
91
105
|
|
92
106
|
def assign_update(*assigner, &block)
|
93
107
|
if block
|
94
|
-
|
95
|
-
|
96
|
-
|
108
|
+
assigner_from_block = instance_eval(&block)
|
109
|
+
assigner =
|
110
|
+
if assigner.empty?
|
111
|
+
# block only
|
112
|
+
[assigner_from_block]
|
113
|
+
# If Ruby >= 3.0, one line pattern match can be used
|
114
|
+
# assigner_from_block in [Array, *]
|
115
|
+
elsif multiple_assigner?(assigner_from_block)
|
116
|
+
assigner.zip(assigner_from_block)
|
117
|
+
else
|
118
|
+
assigner.zip([assigner_from_block])
|
119
|
+
end
|
97
120
|
end
|
121
|
+
|
98
122
|
case assigner
|
99
123
|
in [] | [nil] | [{}] | [[]]
|
100
124
|
return self
|
@@ -113,6 +137,8 @@ module RedAmber
|
|
113
137
|
updater = {}
|
114
138
|
appender = {}
|
115
139
|
key_array_pairs.each do |key, array|
|
140
|
+
raise DataFrameArgumentError, "Empty column data: #{key} => nil" if array.nil?
|
141
|
+
|
116
142
|
if keys.include? key
|
117
143
|
updater[key] = array
|
118
144
|
else
|
@@ -153,7 +179,7 @@ module RedAmber
|
|
153
179
|
data = updater[key]
|
154
180
|
next unless data
|
155
181
|
|
156
|
-
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
|
182
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.nil? || data.size != size
|
157
183
|
|
158
184
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
159
185
|
fields[i] = Arrow::Field.new(key, a.value_data_type)
|
@@ -179,8 +205,13 @@ module RedAmber
|
|
179
205
|
end
|
180
206
|
end
|
181
207
|
|
182
|
-
def
|
183
|
-
|
208
|
+
def multiple_assigner?(assigner)
|
209
|
+
case assigner
|
210
|
+
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
211
|
+
true
|
212
|
+
else
|
213
|
+
false
|
214
|
+
end
|
184
215
|
end
|
185
216
|
end
|
186
217
|
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -34,13 +34,6 @@ module RedAmber
|
|
34
34
|
end
|
35
35
|
alias_method :std, :sd
|
36
36
|
|
37
|
-
# option(s) required
|
38
|
-
# - index
|
39
|
-
|
40
|
-
# Returns other than value
|
41
|
-
# - mode
|
42
|
-
# - tdigest
|
43
|
-
|
44
37
|
# Return quantile
|
45
38
|
# 0.5 quantile (median) is returned by default.
|
46
39
|
# Or return quantile for specified probability (prob).
|
@@ -88,8 +81,9 @@ module RedAmber
|
|
88
81
|
|
89
82
|
# [Unary element-wise]: vector.func => vector
|
90
83
|
unary_element_wise =
|
91
|
-
%i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward
|
92
|
-
is_inf is_nan is_null is_valid
|
84
|
+
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos fill_null_backward \
|
85
|
+
fill_null_forward floor is_finite is_inf is_nan is_null is_valid ln log10 log1p log2 \
|
86
|
+
round round_to_multiple sign sin tan trunc unique]
|
93
87
|
unary_element_wise.each do |function|
|
94
88
|
define_method(function) do |**options|
|
95
89
|
datum = exec_func_unary(function, options)
|
@@ -129,16 +123,9 @@ module RedAmber
|
|
129
123
|
end
|
130
124
|
alias_method :not, :invert
|
131
125
|
|
132
|
-
# NaN support needed
|
133
|
-
# - acos asin ln log10 log1p log2
|
134
|
-
|
135
|
-
# Functions with numerical range check
|
136
|
-
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
137
|
-
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
138
|
-
|
139
126
|
# [Binary element-wise]: vector.func(other) => vector
|
140
127
|
binary_element_wise =
|
141
|
-
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
|
128
|
+
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
|
142
129
|
binary_element_wise.each do |function|
|
143
130
|
define_method(function) do |other, **options|
|
144
131
|
datum = exec_func_binary(function, other, options)
|
@@ -162,13 +149,6 @@ module RedAmber
|
|
162
149
|
end
|
163
150
|
end
|
164
151
|
|
165
|
-
# NaN support needed
|
166
|
-
# - logb
|
167
|
-
|
168
|
-
# Functions with numerical range check
|
169
|
-
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
170
|
-
# shift_left_checked shift_right_checked
|
171
|
-
|
172
152
|
# [Binary element-wise with operator]: vector.func(other) => vector
|
173
153
|
binary_element_wise_op = {
|
174
154
|
add: '+',
|
@@ -216,6 +196,23 @@ module RedAmber
|
|
216
196
|
[Vector.new(Array(other) * size), self]
|
217
197
|
end
|
218
198
|
|
199
|
+
# < Not implimented yet > ---
|
200
|
+
|
201
|
+
# option(s) required
|
202
|
+
# - index
|
203
|
+
|
204
|
+
# Returns other than value
|
205
|
+
# - mode
|
206
|
+
# - tdigest
|
207
|
+
|
208
|
+
# Functions with numerical range check (unary)
|
209
|
+
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
210
|
+
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
211
|
+
|
212
|
+
# Functions with numerical range check (binary)
|
213
|
+
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
214
|
+
# shift_left_checked shift_right_checked
|
215
|
+
|
219
216
|
# (array functions)
|
220
217
|
# dictionary_encode,
|
221
218
|
# partition_nth_indices,
|
@@ -82,16 +82,17 @@ module RedAmber
|
|
82
82
|
|
83
83
|
# @param values [Array, Arrow::Array, Vector]
|
84
84
|
def is_in(*values)
|
85
|
-
|
85
|
+
self_data = chunked? ? data.pack : data
|
86
|
+
|
86
87
|
array =
|
87
|
-
case values
|
88
|
-
|
89
|
-
values[0].
|
90
|
-
|
91
|
-
values
|
88
|
+
case values
|
89
|
+
in [Vector] | [Arrow::Array] | [Arrow::ChunkedArray]
|
90
|
+
values[0].to_a
|
91
|
+
else
|
92
|
+
Array(values).flatten
|
92
93
|
end
|
93
|
-
|
94
|
-
Vector.new(
|
94
|
+
|
95
|
+
Vector.new(self_data.is_in(array))
|
95
96
|
end
|
96
97
|
|
97
98
|
# Arrow's support required
|
data/lib/red_amber/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|