red_amber 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +15 -0
- data/CHANGELOG.md +170 -20
- data/Gemfile +4 -2
- data/README.md +121 -302
- data/benchmark/basic.yml +79 -0
- data/benchmark/combine.yml +63 -0
- data/benchmark/drop_nil.yml +15 -3
- data/benchmark/group.yml +33 -0
- data/benchmark/reshape.yml +27 -0
- data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
- data/benchmark/rover/flights.yml +23 -0
- data/benchmark/rover/penguins.yml +23 -0
- data/benchmark/rover/planes.yml +23 -0
- data/benchmark/rover/weather.yml +23 -0
- data/doc/DataFrame.md +611 -318
- data/doc/Vector.md +31 -36
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/join.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/set_and_bind.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +16 -42
- data/lib/red_amber/data_frame_combinable.rb +283 -0
- data/lib/red_amber/data_frame_displayable.rb +58 -3
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +8 -6
- data/lib/red_amber/data_frame_selectable.rb +9 -9
- data/lib/red_amber/data_frame_variable_operation.rb +27 -21
- data/lib/red_amber/group.rb +100 -17
- data/lib/red_amber/helper.rb +20 -30
- data/lib/red_amber/vector.rb +56 -30
- data/lib/red_amber/vector_functions.rb +0 -8
- data/lib/red_amber/vector_selectable.rb +9 -1
- data/lib/red_amber/vector_updatable.rb +61 -63
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +1 -1
- metadata +32 -11
- data/doc/examples_of_red_amber.ipynb +0 -8979
@@ -37,8 +37,12 @@ module RedAmber
|
|
37
37
|
alias_method :describe, :summary
|
38
38
|
|
39
39
|
def inspect
|
40
|
-
|
40
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
41
|
+
case mode.upcase
|
42
|
+
when 'TDR'
|
41
43
|
"#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
|
44
|
+
when 'MINIMUM'
|
45
|
+
shape_str
|
42
46
|
else
|
43
47
|
"#<#{shape_str(with_id: true)}>\n#{self}"
|
44
48
|
end
|
@@ -55,6 +59,23 @@ module RedAmber
|
|
55
59
|
"#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
|
56
60
|
end
|
57
61
|
|
62
|
+
def to_iruby
|
63
|
+
require 'iruby'
|
64
|
+
return ['text/plain', '(empty DataFrame)'] if empty?
|
65
|
+
|
66
|
+
mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
|
67
|
+
case mode.upcase
|
68
|
+
when 'PLAIN'
|
69
|
+
['text/plain', inspect]
|
70
|
+
when 'MINIMUM'
|
71
|
+
['text/plain', shape_str]
|
72
|
+
when 'TDR'
|
73
|
+
size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
|
74
|
+
else # 'TABLE'
|
75
|
+
['text/html', html_table]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
58
79
|
private # =====
|
59
80
|
|
60
81
|
def shape_str(with_id: false)
|
@@ -98,7 +119,7 @@ module RedAmber
|
|
98
119
|
else
|
99
120
|
[shorthand(vector, size, max_element)]
|
100
121
|
end
|
101
|
-
sio.printf header_format, i
|
122
|
+
sio.printf header_format, i, key, type, data_tally.size, a.join(', ')
|
102
123
|
end
|
103
124
|
sio.string
|
104
125
|
end
|
@@ -153,10 +174,12 @@ module RedAmber
|
|
153
174
|
end
|
154
175
|
|
155
176
|
def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
|
177
|
+
return " #{keys.join(' ')}\n (Empty Vectors)\n" if size.zero?
|
178
|
+
|
156
179
|
original = self
|
157
180
|
indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
|
158
181
|
df = slice(indices).assign do
|
159
|
-
assigner = { INDEX_KEY => indices.map
|
182
|
+
assigner = { INDEX_KEY => indices.map(&:to_s) }
|
160
183
|
vectors.each_with_object(assigner) do |v, a|
|
161
184
|
a[v.key] = v.to_a.map do |e|
|
162
185
|
if e.nil?
|
@@ -220,5 +243,37 @@ module RedAmber
|
|
220
243
|
"%#{width}s"
|
221
244
|
end
|
222
245
|
end
|
246
|
+
|
247
|
+
def html_table
|
248
|
+
reduced = size > 8 ? self[0..4, -4..-1] : self
|
249
|
+
|
250
|
+
converted = reduced.assign do
|
251
|
+
vectors.select.with_object({}) do |vector, assigner|
|
252
|
+
assigner[vector.key] = vector.map do |element|
|
253
|
+
case element
|
254
|
+
in TrueClass
|
255
|
+
'<i>(true)</i>'
|
256
|
+
in FalseClass
|
257
|
+
'<i>(false)</i>'
|
258
|
+
in NilClass
|
259
|
+
'<i>(nil)</i>'
|
260
|
+
in ''
|
261
|
+
'""'
|
262
|
+
in String
|
263
|
+
element.sub(/^(\s+)$/, '"\1"') # blank spaces
|
264
|
+
in Float
|
265
|
+
format('%g', element)
|
266
|
+
in Integer
|
267
|
+
format('%d', element)
|
268
|
+
else
|
269
|
+
element
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
|
276
|
+
"#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
|
277
|
+
end
|
223
278
|
end
|
224
279
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-ins for the class DataFrame
|
5
|
+
module DataFrameLoadSave
|
6
|
+
# Enable `self.load` as class method of DataFrame
|
7
|
+
def self.included(klass)
|
8
|
+
klass.extend ClassMethods
|
9
|
+
end
|
10
|
+
|
11
|
+
# Enable `self.load` as class method of DataFrame
|
12
|
+
module ClassMethods
|
13
|
+
# Load DataFrame via Arrow::Table.load
|
14
|
+
def load(path, options = {})
|
15
|
+
DataFrame.new(Arrow::Table.load(path, options))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Save DataFrame
|
20
|
+
def save(output, options = {})
|
21
|
+
@table.save(output, options)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Save and reload to cast automatically
|
25
|
+
# Via tsv format file temporally as default
|
26
|
+
#
|
27
|
+
# experimental feature
|
28
|
+
def auto_cast(format: :tsv)
|
29
|
+
return self if empty?
|
30
|
+
|
31
|
+
tempfile = Arrow::ResizableBuffer.new(1024)
|
32
|
+
save(tempfile, format: format)
|
33
|
+
DataFrame.load(tempfile, format: format)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -9,16 +9,17 @@ module RedAmber
|
|
9
9
|
# to transepose into keys.
|
10
10
|
# If it is not specified, keys[0] is used.
|
11
11
|
# @param new_key [Symbol] key name of transposed index column.
|
12
|
-
# If it is not specified, :
|
12
|
+
# If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
|
13
13
|
# @return [DataFrame] trnsposed DataFrame
|
14
|
-
def transpose(key: keys.first, name: :
|
14
|
+
def transpose(key: keys.first, name: :NAME)
|
15
15
|
raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
|
16
16
|
|
17
17
|
# Find unused name
|
18
18
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
19
|
-
name = (:
|
19
|
+
name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
|
20
20
|
|
21
|
-
|
21
|
+
names = (keys - [key]).map { |x| x&.to_s }
|
22
|
+
hash = { name => names }
|
22
23
|
i = keys.index(key)
|
23
24
|
each_row do |h|
|
24
25
|
k = h.values[i]
|
@@ -33,7 +34,7 @@ module RedAmber
|
|
33
34
|
# @param name [Symbol, String] key of the column which is come **from values**.
|
34
35
|
# @param value [Symbol, String] key of the column which is come **from values**.
|
35
36
|
# @return [DataFrame] long DataFrame.
|
36
|
-
def to_long(*keep_keys, name: :
|
37
|
+
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
37
38
|
not_included = keep_keys - keys
|
38
39
|
raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
|
39
40
|
|
@@ -55,6 +56,7 @@ module RedAmber
|
|
55
56
|
end
|
56
57
|
end
|
57
58
|
end
|
59
|
+
hash[name] = hash[name].map { |x| x&.to_s }
|
58
60
|
DataFrame.new(hash)
|
59
61
|
end
|
60
62
|
|
@@ -63,7 +65,7 @@ module RedAmber
|
|
63
65
|
# @param name [Symbol, String] key of the column which will be expanded **to key names**.
|
64
66
|
# @param value [Symbol, String] key of the column which will be expanded **to values**.
|
65
67
|
# @return [DataFrame] wide DataFrame.
|
66
|
-
def to_wide(name: :
|
68
|
+
def to_wide(name: :NAME, value: :VALUE)
|
67
69
|
name = name.to_sym
|
68
70
|
raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
|
69
71
|
|
@@ -17,7 +17,7 @@ module RedAmber
|
|
17
17
|
raise DataFrameArgumentError, "Size is not match in booleans: #{args}"
|
18
18
|
end
|
19
19
|
return take_by_array(vector) if vector.numeric?
|
20
|
-
return select_vars_by_keys(vector.to_a.map(&:to_sym)) if vector.string? || vector.
|
20
|
+
return select_vars_by_keys(vector.to_a.map(&:to_sym)) if vector.string? || vector.dictionary?
|
21
21
|
|
22
22
|
raise DataFrameArgumentError, "Invalid argument: #{args}"
|
23
23
|
end
|
@@ -118,10 +118,10 @@ module RedAmber
|
|
118
118
|
end
|
119
119
|
|
120
120
|
normalized_indices = normalized_indices.floor.to_a.map(&:to_i) # round to integer array
|
121
|
-
return remove_all_values if normalized_indices == indices
|
121
|
+
return remove_all_values if normalized_indices == indices.to_a
|
122
122
|
return self if normalized_indices.empty?
|
123
123
|
|
124
|
-
index_array = indices - normalized_indices
|
124
|
+
index_array = indices.to_a - normalized_indices
|
125
125
|
|
126
126
|
datum = Arrow::Function.find(:take).execute([table, index_array])
|
127
127
|
return DataFrame.new(datum.value)
|
@@ -168,14 +168,14 @@ module RedAmber
|
|
168
168
|
|
169
169
|
# Undocumented
|
170
170
|
# TODO: support for option {boundscheck: true}
|
171
|
-
def take(*
|
172
|
-
|
173
|
-
return remove_all_values if
|
171
|
+
def take(*arg_indices)
|
172
|
+
arg_indices.flatten!
|
173
|
+
return remove_all_values if arg_indices.empty?
|
174
174
|
|
175
|
-
|
176
|
-
|
175
|
+
arg_indices = arg_indices[0] if arg_indices.one? && !arg_indices[0].is_a?(Numeric)
|
176
|
+
arg_indices = Vector.new(arg_indices) unless arg_indices.is_a?(Vector)
|
177
177
|
|
178
|
-
take_by_array(
|
178
|
+
take_by_array(arg_indices)
|
179
179
|
end
|
180
180
|
|
181
181
|
# Undocumented
|
@@ -15,16 +15,22 @@ module RedAmber
|
|
15
15
|
return DataFrame.new if picker.empty? || picker == [nil]
|
16
16
|
|
17
17
|
key_vector = Vector.new(keys)
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
vec = parse_to_vector(picker, vsize: n_keys)
|
19
|
+
|
20
|
+
ary =
|
21
|
+
if vec.boolean?
|
22
|
+
key_vector.filter(*vec).to_a
|
23
|
+
elsif vec.numeric?
|
24
|
+
key_vector.take(*vec).to_a
|
25
|
+
elsif vec.string? || vec.dictionary?
|
26
|
+
vec.to_a
|
27
|
+
else
|
28
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
29
|
+
end
|
22
30
|
|
23
|
-
# DataFrame#[] creates a Vector
|
31
|
+
# DataFrame#[] creates a Vector if single key is specified.
|
24
32
|
# DataFrame#pick creates a DataFrame with single key.
|
25
|
-
|
26
|
-
|
27
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
33
|
+
DataFrame.new(@table[ary])
|
28
34
|
end
|
29
35
|
|
30
36
|
# drop some variables to create remainer sub DataFrame
|
@@ -38,24 +44,24 @@ module RedAmber
|
|
38
44
|
dropper.flatten!
|
39
45
|
|
40
46
|
key_vector = Vector.new(keys)
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
if
|
45
|
-
key_vector.filter(*
|
46
|
-
elsif
|
47
|
-
keys - key_vector.take(*
|
47
|
+
vec = parse_to_vector(dropper, vsize: n_keys)
|
48
|
+
|
49
|
+
ary =
|
50
|
+
if vec.boolean?
|
51
|
+
key_vector.filter(*vec.primitive_invert).each.map(&:to_sym) # Array
|
52
|
+
elsif vec.numeric?
|
53
|
+
keys - key_vector.take(*vec).each.map(&:to_sym) # Array
|
54
|
+
elsif vec.string? || vec.dictionary?
|
55
|
+
keys - vec.to_a.map { _1&.to_sym } # Array
|
48
56
|
else
|
49
|
-
|
57
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
50
58
|
end
|
51
59
|
|
52
|
-
return DataFrame.new if
|
60
|
+
return DataFrame.new if ary.empty?
|
53
61
|
|
54
|
-
# DataFrame#[] creates a Vector
|
62
|
+
# DataFrame#[] creates a Vector if single key is specified.
|
55
63
|
# DataFrame#drop creates a DataFrame with single key.
|
56
|
-
|
57
|
-
|
58
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
64
|
+
DataFrame.new(@table[ary])
|
59
65
|
end
|
60
66
|
|
61
67
|
# rename variables to create a new DataFrame
|
data/lib/red_amber/group.rb
CHANGED
@@ -3,35 +3,84 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# group class
|
5
5
|
class Group
|
6
|
+
include Enumerable # This feature is experimental
|
7
|
+
|
6
8
|
# Creates a new Group object.
|
7
9
|
#
|
8
10
|
# @param dataframe [DataFrame] dataframe to be grouped.
|
9
11
|
# @param group_keys [Array<>] keys for grouping.
|
10
12
|
def initialize(dataframe, *group_keys)
|
11
13
|
@dataframe = dataframe
|
12
|
-
@table = @dataframe.table
|
13
14
|
@group_keys = group_keys.flatten
|
14
15
|
|
15
|
-
raise GroupArgumentError, 'group_keys
|
16
|
+
raise GroupArgumentError, 'group_keys are empty.' if @group_keys.empty?
|
16
17
|
|
17
18
|
d = @group_keys - @dataframe.keys
|
18
19
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless d.empty?
|
19
20
|
|
20
|
-
@
|
21
|
+
@filters = @group_counts = @base_table = nil
|
22
|
+
@group = @dataframe.table.group(*@group_keys)
|
21
23
|
end
|
22
24
|
|
25
|
+
attr_reader :dataframe, :group_keys
|
26
|
+
|
23
27
|
functions = %i[count sum product mean min max stddev variance]
|
24
28
|
functions.each do |function|
|
25
29
|
define_method(function) do |*summary_keys|
|
26
|
-
|
30
|
+
summary_keys = Array(summary_keys).flatten
|
31
|
+
d = summary_keys - @dataframe.keys
|
32
|
+
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
33
|
+
|
34
|
+
table = @group.aggregate(*build_aggregation_keys("hash_#{function}", summary_keys))
|
35
|
+
df = DataFrame.new(table)
|
36
|
+
df.pick(@group_keys, df.keys - @group_keys)
|
27
37
|
end
|
28
38
|
end
|
29
39
|
|
30
|
-
|
31
|
-
|
32
|
-
|
40
|
+
alias_method :__count, :count
|
41
|
+
private :__count
|
42
|
+
|
43
|
+
def count(*summary_keys)
|
44
|
+
df = __count(summary_keys)
|
45
|
+
# if counts are the same (and do not include NaN or nil), aggregate count columns.
|
46
|
+
if df.pick(@group_keys.size..).to_h.values.uniq.size == 1
|
47
|
+
df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
|
48
|
+
else
|
49
|
+
df
|
33
50
|
end
|
34
|
-
|
51
|
+
end
|
52
|
+
|
53
|
+
def filters
|
54
|
+
@filters ||= begin
|
55
|
+
first, *others = @group_keys.map do |key|
|
56
|
+
vector = @dataframe[key]
|
57
|
+
vector.uniq.each.map { |u| u.nil? ? vector.is_nil : vector == u }
|
58
|
+
end
|
59
|
+
|
60
|
+
if others.empty?
|
61
|
+
first.select(&:any?)
|
62
|
+
else
|
63
|
+
first.product(*others).map { |a| a.reduce(&:&) }.select(&:any?)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def each
|
69
|
+
filters
|
70
|
+
return enum_for(:each) unless block_given?
|
71
|
+
|
72
|
+
@filters.each do |filter|
|
73
|
+
yield @dataframe[filter]
|
74
|
+
end
|
75
|
+
@filters.size
|
76
|
+
end
|
77
|
+
|
78
|
+
def group_count
|
79
|
+
DataFrame.new(add_columns_to_table(base_table, [:group_count], [group_counts]))
|
80
|
+
end
|
81
|
+
|
82
|
+
def inspect
|
83
|
+
"#<#{self.class} : #{format('0x%016x', object_id)}>\n#{group_count}"
|
35
84
|
end
|
36
85
|
|
37
86
|
def summarize(&block)
|
@@ -48,16 +97,50 @@ module RedAmber
|
|
48
97
|
|
49
98
|
private
|
50
99
|
|
51
|
-
def
|
52
|
-
|
53
|
-
|
54
|
-
|
100
|
+
def build_aggregation_keys(function_name, summary_keys)
|
101
|
+
if summary_keys.empty?
|
102
|
+
[function_name]
|
103
|
+
else
|
104
|
+
summary_keys.map { |key| "#{function_name}(#{key})" }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# @group_counts.sum == @dataframe.size
|
109
|
+
def group_counts
|
110
|
+
@group_counts ||= filters.map(&:sum)
|
111
|
+
end
|
112
|
+
|
113
|
+
def base_table
|
114
|
+
@base_table ||= begin
|
115
|
+
indexes = filters.map { |filter| filter.index(true) }
|
116
|
+
@dataframe.table[@group_keys].take(indexes)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def add_columns_to_table(table, keys, data_arrays)
|
121
|
+
fields = table.schema.fields
|
122
|
+
arrays = table.columns.map(&:data)
|
123
|
+
|
124
|
+
keys.zip(data_arrays).each do |key, array|
|
125
|
+
data = Arrow::ChunkedArray.new([array])
|
126
|
+
fields << Arrow::Field.new(key, data.value_data_type)
|
127
|
+
arrays << data
|
128
|
+
end
|
129
|
+
|
130
|
+
Arrow::Table.new(Arrow::Schema.new(fields), arrays)
|
131
|
+
end
|
55
132
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
133
|
+
# Call Vector aggregating function and return an array of arrays:
|
134
|
+
# [keys, data_arrays]
|
135
|
+
# (Experimental feature)
|
136
|
+
def call_aggregating_function(func, summary_keys, _options)
|
137
|
+
summary_keys.each.with_object([[], []]) do |key, (keys, arrays)|
|
138
|
+
vector = @dataframe[key]
|
139
|
+
arrays << filters.map { |filter| vector.filter(filter).send(func) }
|
140
|
+
keys << "#{func}(#{key})".to_sym
|
141
|
+
rescue Arrow::Error::NotImplemented
|
142
|
+
# next
|
143
|
+
end
|
61
144
|
end
|
62
145
|
end
|
63
146
|
end
|
data/lib/red_amber/helper.rb
CHANGED
@@ -9,52 +9,42 @@ module RedAmber
|
|
9
9
|
num > 1 ? 's' : ''
|
10
10
|
end
|
11
11
|
|
12
|
-
def out_of_range?(indeces)
|
13
|
-
indeces.max >= size || indeces.min < -size
|
14
|
-
end
|
15
|
-
|
16
|
-
def integers?(enum)
|
17
|
-
enum.all?(Integer)
|
18
|
-
end
|
19
|
-
|
20
|
-
def sym_or_str?(enum)
|
21
|
-
enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
|
22
|
-
end
|
23
|
-
|
24
12
|
def booleans?(enum)
|
25
13
|
enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
26
14
|
end
|
27
15
|
|
28
|
-
def
|
29
|
-
DataFrame.new(key => vector.data)
|
30
|
-
end
|
31
|
-
|
32
|
-
def parse_to_vector(args)
|
16
|
+
def parse_to_vector(args, vsize: size)
|
33
17
|
a = args.reduce([]) do |accum, elem|
|
34
|
-
accum.concat(normalize_element(elem))
|
18
|
+
accum.concat(normalize_element(elem, vsize: vsize))
|
35
19
|
end
|
36
20
|
Vector.new(a)
|
37
21
|
end
|
38
22
|
|
39
|
-
def normalize_element(elem)
|
23
|
+
def normalize_element(elem, vsize: size)
|
40
24
|
case elem
|
41
|
-
when
|
42
|
-
[
|
25
|
+
when NilClass
|
26
|
+
[nil]
|
43
27
|
when Range
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
28
|
+
bg = elem.begin
|
29
|
+
en = elem.end
|
30
|
+
if [bg, en].any?(Integer)
|
31
|
+
bg += vsize if bg&.negative?
|
32
|
+
en += vsize if en&.negative?
|
33
|
+
en -= 1 if en.is_a?(Integer) && elem.exclude_end?
|
34
|
+
if bg&.negative? || (en && en >= vsize)
|
35
|
+
raise DataFrameArgumentError, "Index out of range: #{elem} for 0..#{vsize - 1}"
|
50
36
|
end
|
51
37
|
|
52
|
-
(0...
|
38
|
+
Array(0...vsize)[elem]
|
39
|
+
elsif bg.nil? && en.nil?
|
40
|
+
Array(0...vsize)
|
53
41
|
else
|
54
|
-
elem
|
42
|
+
Array(elem)
|
55
43
|
end
|
44
|
+
when Enumerator
|
45
|
+
elem.to_a
|
56
46
|
else
|
57
|
-
Array
|
47
|
+
Array[elem]
|
58
48
|
end
|
59
49
|
end
|
60
50
|
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -11,31 +11,39 @@ module RedAmber
|
|
11
11
|
include Helper
|
12
12
|
|
13
13
|
def initialize(*array)
|
14
|
-
@key = nil # default is 'headless'
|
15
|
-
if array.empty? || array
|
14
|
+
@key = nil # default is 'headless' Vector
|
15
|
+
if array.empty? || array.first.nil?
|
16
16
|
Vector.new([])
|
17
17
|
else
|
18
18
|
array.flatten!
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
19
|
+
@data =
|
20
|
+
case array
|
21
|
+
in [Vector => v]
|
22
|
+
v.data
|
23
|
+
in [Arrow::Array => a]
|
24
|
+
a
|
25
|
+
in [Arrow::ChunkedArray => ca]
|
26
|
+
ca
|
27
|
+
in [arrow_array_like] if arrow_array_like.respond_to?(:to_arrow_array)
|
28
|
+
arrow_array_like.to_arrow_array
|
29
|
+
in [Range => r]
|
30
|
+
Arrow::Array.new(Array(r))
|
31
|
+
else
|
32
|
+
begin
|
33
|
+
Arrow::Array.new(Array(array))
|
34
|
+
rescue Error
|
35
|
+
raise VectorArgumentError, "Invalid argument: #{array}"
|
36
|
+
end
|
37
|
+
end
|
35
38
|
end
|
36
39
|
end
|
37
40
|
|
38
41
|
attr_reader :data
|
42
|
+
|
43
|
+
def to_arrow_array
|
44
|
+
@data
|
45
|
+
end
|
46
|
+
|
39
47
|
attr_accessor :key
|
40
48
|
|
41
49
|
def to_s
|
@@ -43,19 +51,24 @@ module RedAmber
|
|
43
51
|
end
|
44
52
|
|
45
53
|
def inspect(limit: 80)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
s
|
53
|
-
|
54
|
+
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
|
55
|
+
# Better performance than `.upcase == 'MINIMUM'``
|
56
|
+
"#{self.class}(:#{type}, size=#{size})"
|
57
|
+
else
|
58
|
+
sio = StringIO.new << '['
|
59
|
+
to_a.each_with_object(sio).with_index do |(e, s), i|
|
60
|
+
next_str = "#{s.size > 1 ? ', ' : ''}#{e.inspect}"
|
61
|
+
if (s.size + next_str.size) < limit
|
62
|
+
s << next_str
|
63
|
+
else
|
64
|
+
s << ', ... ' if i < size
|
65
|
+
break
|
66
|
+
end
|
54
67
|
end
|
55
|
-
|
56
|
-
sio << ']'
|
68
|
+
sio << ']'
|
57
69
|
|
58
|
-
|
70
|
+
format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n", object_id, sio.string
|
71
|
+
end
|
59
72
|
end
|
60
73
|
|
61
74
|
def values
|
@@ -71,7 +84,7 @@ module RedAmber
|
|
71
84
|
alias_method :indeces, :indices
|
72
85
|
|
73
86
|
def to_ary
|
74
|
-
|
87
|
+
values
|
75
88
|
end
|
76
89
|
|
77
90
|
def size
|
@@ -110,6 +123,10 @@ module RedAmber
|
|
110
123
|
type_class == Arrow::StringDataType
|
111
124
|
end
|
112
125
|
|
126
|
+
def dictionary?
|
127
|
+
type_class == Arrow::DictionaryDataType
|
128
|
+
end
|
129
|
+
|
113
130
|
def temporal?
|
114
131
|
type_class < Arrow::TemporalDataType
|
115
132
|
end
|
@@ -126,10 +143,19 @@ module RedAmber
|
|
126
143
|
end
|
127
144
|
end
|
128
145
|
|
146
|
+
def map(&block)
|
147
|
+
return enum_for(:map) unless block
|
148
|
+
|
149
|
+
Vector.new(to_a.map(&block))
|
150
|
+
end
|
151
|
+
alias_method :collect, :map
|
152
|
+
|
153
|
+
# undocumented
|
129
154
|
def chunked?
|
130
155
|
@data.is_a? Arrow::ChunkedArray
|
131
156
|
end
|
132
157
|
|
158
|
+
# undocumented
|
133
159
|
def n_chunks
|
134
160
|
chunked? ? @data.n_chunks : 0
|
135
161
|
end
|
@@ -187,12 +187,6 @@ module RedAmber
|
|
187
187
|
alias_method :ne, :not_equal
|
188
188
|
|
189
189
|
def coerce(other)
|
190
|
-
case other
|
191
|
-
when Vector, Array, Arrow::Array
|
192
|
-
raise VectorArgumentError, "Size unmatch: #{size} != #{other.length}" unless size == other.length
|
193
|
-
|
194
|
-
[Vector.new(Array(other)), self]
|
195
|
-
end
|
196
190
|
[Vector.new(Array(other) * size), self]
|
197
191
|
end
|
198
192
|
|
@@ -271,8 +265,6 @@ module RedAmber
|
|
271
265
|
find(function).execute([data, other.data], options)
|
272
266
|
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar, Array, Numeric, String, TrueClass, FalseClass
|
273
267
|
find(function).execute([data, other], options)
|
274
|
-
else
|
275
|
-
raise VectorArgumentError, "Operand is not supported: #{other.class}"
|
276
268
|
end
|
277
269
|
end
|
278
270
|
|