red_amber 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +15 -0
  3. data/CHANGELOG.md +170 -20
  4. data/Gemfile +4 -2
  5. data/README.md +121 -302
  6. data/benchmark/basic.yml +79 -0
  7. data/benchmark/combine.yml +63 -0
  8. data/benchmark/drop_nil.yml +15 -3
  9. data/benchmark/group.yml +33 -0
  10. data/benchmark/reshape.yml +27 -0
  11. data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
  12. data/benchmark/rover/flights.yml +23 -0
  13. data/benchmark/rover/penguins.yml +23 -0
  14. data/benchmark/rover/planes.yml +23 -0
  15. data/benchmark/rover/weather.yml +23 -0
  16. data/doc/DataFrame.md +611 -318
  17. data/doc/Vector.md +31 -36
  18. data/doc/image/basic_verbs.png +0 -0
  19. data/doc/image/dataframe/assign.png +0 -0
  20. data/doc/image/dataframe/assign_operation.png +0 -0
  21. data/doc/image/dataframe/drop.png +0 -0
  22. data/doc/image/dataframe/join.png +0 -0
  23. data/doc/image/dataframe/pick.png +0 -0
  24. data/doc/image/dataframe/pick_operation.png +0 -0
  25. data/doc/image/dataframe/remove.png +0 -0
  26. data/doc/image/dataframe/rename.png +0 -0
  27. data/doc/image/dataframe/rename_operation.png +0 -0
  28. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  29. data/doc/image/dataframe/set_and_bind.png +0 -0
  30. data/doc/image/dataframe/slice.png +0 -0
  31. data/doc/image/dataframe/slice_operation.png +0 -0
  32. data/doc/image/dataframe_model.png +0 -0
  33. data/doc/image/group_operation.png +0 -0
  34. data/doc/image/replace-if_then.png +0 -0
  35. data/doc/image/reshaping_dataframe.png +0 -0
  36. data/doc/image/screenshot.png +0 -0
  37. data/doc/image/vector/binary_element_wise.png +0 -0
  38. data/doc/image/vector/unary_aggregation.png +0 -0
  39. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  40. data/doc/image/vector/unary_element_wise.png +0 -0
  41. data/lib/red_amber/data_frame.rb +16 -42
  42. data/lib/red_amber/data_frame_combinable.rb +283 -0
  43. data/lib/red_amber/data_frame_displayable.rb +58 -3
  44. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  45. data/lib/red_amber/data_frame_reshaping.rb +8 -6
  46. data/lib/red_amber/data_frame_selectable.rb +9 -9
  47. data/lib/red_amber/data_frame_variable_operation.rb +27 -21
  48. data/lib/red_amber/group.rb +100 -17
  49. data/lib/red_amber/helper.rb +20 -30
  50. data/lib/red_amber/vector.rb +56 -30
  51. data/lib/red_amber/vector_functions.rb +0 -8
  52. data/lib/red_amber/vector_selectable.rb +9 -1
  53. data/lib/red_amber/vector_updatable.rb +61 -63
  54. data/lib/red_amber/version.rb +1 -1
  55. data/lib/red_amber.rb +2 -0
  56. data/red_amber.gemspec +1 -1
  57. metadata +32 -11
  58. data/doc/examples_of_red_amber.ipynb +0 -8979
@@ -37,8 +37,12 @@ module RedAmber
37
37
  alias_method :describe, :summary
38
38
 
39
39
  def inspect
40
- if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
40
+ mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
41
+ case mode.upcase
42
+ when 'TDR'
41
43
  "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
44
+ when 'MINIMUM'
45
+ shape_str
42
46
  else
43
47
  "#<#{shape_str(with_id: true)}>\n#{self}"
44
48
  end
@@ -55,6 +59,23 @@ module RedAmber
55
59
  "#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
56
60
  end
57
61
 
62
+ def to_iruby
63
+ require 'iruby'
64
+ return ['text/plain', '(empty DataFrame)'] if empty?
65
+
66
+ mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
67
+ case mode.upcase
68
+ when 'PLAIN'
69
+ ['text/plain', inspect]
70
+ when 'MINIMUM'
71
+ ['text/plain', shape_str]
72
+ when 'TDR'
73
+ size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
74
+ else # 'TABLE'
75
+ ['text/html', html_table]
76
+ end
77
+ end
78
+
58
79
  private # =====
59
80
 
60
81
  def shape_str(with_id: false)
@@ -98,7 +119,7 @@ module RedAmber
98
119
  else
99
120
  [shorthand(vector, size, max_element)]
100
121
  end
101
- sio.printf header_format, i + 1, key, type, data_tally.size, a.join(', ')
122
+ sio.printf header_format, i, key, type, data_tally.size, a.join(', ')
102
123
  end
103
124
  sio.string
104
125
  end
@@ -153,10 +174,12 @@ module RedAmber
153
174
  end
154
175
 
155
176
  def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
177
+ return " #{keys.join(' ')}\n (Empty Vectors)\n" if size.zero?
178
+
156
179
  original = self
157
180
  indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
158
181
  df = slice(indices).assign do
159
- assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
182
+ assigner = { INDEX_KEY => indices.map(&:to_s) }
160
183
  vectors.each_with_object(assigner) do |v, a|
161
184
  a[v.key] = v.to_a.map do |e|
162
185
  if e.nil?
@@ -220,5 +243,37 @@ module RedAmber
220
243
  "%#{width}s"
221
244
  end
222
245
  end
246
+
247
+ def html_table
248
+ reduced = size > 8 ? self[0..4, -4..-1] : self
249
+
250
+ converted = reduced.assign do
251
+ vectors.select.with_object({}) do |vector, assigner|
252
+ assigner[vector.key] = vector.map do |element|
253
+ case element
254
+ in TrueClass
255
+ '<i>(true)</i>'
256
+ in FalseClass
257
+ '<i>(false)</i>'
258
+ in NilClass
259
+ '<i>(nil)</i>'
260
+ in ''
261
+ '""'
262
+ in String
263
+ element.sub(/^(\s+)$/, '"\1"') # blank spaces
264
+ in Float
265
+ format('%g', element)
266
+ in Integer
267
+ format('%d', element)
268
+ else
269
+ element
270
+ end
271
+ end
272
+ end
273
+ end
274
+
275
+ html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
276
+ "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
277
+ end
223
278
  end
224
279
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # mix-ins for the class DataFrame
5
+ module DataFrameLoadSave
6
+ # Enable `self.load` as class method of DataFrame
7
+ def self.included(klass)
8
+ klass.extend ClassMethods
9
+ end
10
+
11
+ # Enable `self.load` as class method of DataFrame
12
+ module ClassMethods
13
+ # Load DataFrame via Arrow::Table.load
14
+ def load(path, options = {})
15
+ DataFrame.new(Arrow::Table.load(path, options))
16
+ end
17
+ end
18
+
19
+ # Save DataFrame
20
+ def save(output, options = {})
21
+ @table.save(output, options)
22
+ end
23
+
24
+ # Save and reload to cast automatically
25
+ # Via tsv format file temporally as default
26
+ #
27
+ # experimental feature
28
+ def auto_cast(format: :tsv)
29
+ return self if empty?
30
+
31
+ tempfile = Arrow::ResizableBuffer.new(1024)
32
+ save(tempfile, format: format)
33
+ DataFrame.load(tempfile, format: format)
34
+ end
35
+ end
36
+ end
@@ -9,16 +9,17 @@ module RedAmber
9
9
  # to transepose into keys.
10
10
  # If it is not specified, keys[0] is used.
11
11
  # @param new_key [Symbol] key name of transposed index column.
12
- # If it is not specified, :N is used. If it already exists, :N1 or :N1.succ is used.
12
+ # If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.
13
13
  # @return [DataFrame] trnsposed DataFrame
14
- def transpose(key: keys.first, name: :N)
14
+ def transpose(key: keys.first, name: :NAME)
15
15
  raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
16
16
 
17
17
  # Find unused name
18
18
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
19
- name = (:N1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
19
+ name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
20
20
 
21
- hash = { name => (keys - [key]) }
21
+ names = (keys - [key]).map { |x| x&.to_s }
22
+ hash = { name => names }
22
23
  i = keys.index(key)
23
24
  each_row do |h|
24
25
  k = h.values[i]
@@ -33,7 +34,7 @@ module RedAmber
33
34
  # @param name [Symbol, String] key of the column which is come **from values**.
34
35
  # @param value [Symbol, String] key of the column which is come **from values**.
35
36
  # @return [DataFrame] long DataFrame.
36
- def to_long(*keep_keys, name: :N, value: :V)
37
+ def to_long(*keep_keys, name: :NAME, value: :VALUE)
37
38
  not_included = keep_keys - keys
38
39
  raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
39
40
 
@@ -55,6 +56,7 @@ module RedAmber
55
56
  end
56
57
  end
57
58
  end
59
+ hash[name] = hash[name].map { |x| x&.to_s }
58
60
  DataFrame.new(hash)
59
61
  end
60
62
 
@@ -63,7 +65,7 @@ module RedAmber
63
65
  # @param name [Symbol, String] key of the column which will be expanded **to key names**.
64
66
  # @param value [Symbol, String] key of the column which will be expanded **to values**.
65
67
  # @return [DataFrame] wide DataFrame.
66
- def to_wide(name: :N, value: :V)
68
+ def to_wide(name: :NAME, value: :VALUE)
67
69
  name = name.to_sym
68
70
  raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
69
71
 
@@ -17,7 +17,7 @@ module RedAmber
17
17
  raise DataFrameArgumentError, "Size is not match in booleans: #{args}"
18
18
  end
19
19
  return take_by_array(vector) if vector.numeric?
20
- return select_vars_by_keys(vector.to_a.map(&:to_sym)) if vector.string? || vector.type == :dictionary
20
+ return select_vars_by_keys(vector.to_a.map(&:to_sym)) if vector.string? || vector.dictionary?
21
21
 
22
22
  raise DataFrameArgumentError, "Invalid argument: #{args}"
23
23
  end
@@ -118,10 +118,10 @@ module RedAmber
118
118
  end
119
119
 
120
120
  normalized_indices = normalized_indices.floor.to_a.map(&:to_i) # round to integer array
121
- return remove_all_values if normalized_indices == indices
121
+ return remove_all_values if normalized_indices == indices.to_a
122
122
  return self if normalized_indices.empty?
123
123
 
124
- index_array = indices - normalized_indices
124
+ index_array = indices.to_a - normalized_indices
125
125
 
126
126
  datum = Arrow::Function.find(:take).execute([table, index_array])
127
127
  return DataFrame.new(datum.value)
@@ -168,14 +168,14 @@ module RedAmber
168
168
 
169
169
  # Undocumented
170
170
  # TODO: support for option {boundscheck: true}
171
- def take(*indices)
172
- indices.flatten!
173
- return remove_all_values if indices.empty?
171
+ def take(*arg_indices)
172
+ arg_indices.flatten!
173
+ return remove_all_values if arg_indices.empty?
174
174
 
175
- indices = indices[0] if indices.one? && !indices[0].is_a?(Numeric)
176
- indices = Vector.new(indices) unless indices.is_a?(Vector)
175
+ arg_indices = arg_indices[0] if arg_indices.one? && !arg_indices[0].is_a?(Numeric)
176
+ arg_indices = Vector.new(arg_indices) unless arg_indices.is_a?(Vector)
177
177
 
178
- take_by_array(indices)
178
+ take_by_array(arg_indices)
179
179
  end
180
180
 
181
181
  # Undocumented
@@ -15,16 +15,22 @@ module RedAmber
15
15
  return DataFrame.new if picker.empty? || picker == [nil]
16
16
 
17
17
  key_vector = Vector.new(keys)
18
- picker_vector = parse_to_vector(picker)
19
-
20
- picker = key_vector.filter(*picker_vector).to_a if picker_vector.boolean?
21
- picker = key_vector.take(*picker_vector).to_a if picker_vector.numeric?
18
+ vec = parse_to_vector(picker, vsize: n_keys)
19
+
20
+ ary =
21
+ if vec.boolean?
22
+ key_vector.filter(*vec).to_a
23
+ elsif vec.numeric?
24
+ key_vector.take(*vec).to_a
25
+ elsif vec.string? || vec.dictionary?
26
+ vec.to_a
27
+ else
28
+ raise DataFrameArgumentError, "Invalid argument #{args}"
29
+ end
22
30
 
23
- # DataFrame#[] creates a Vector with single key is specified.
31
+ # DataFrame#[] creates a Vector if single key is specified.
24
32
  # DataFrame#pick creates a DataFrame with single key.
25
- return DataFrame.new(@table[picker]) if sym_or_str?(picker)
26
-
27
- raise DataFrameArgumentError, "Invalid argument #{args}"
33
+ DataFrame.new(@table[ary])
28
34
  end
29
35
 
30
36
  # drop some variables to create remainer sub DataFrame
@@ -38,24 +44,24 @@ module RedAmber
38
44
  dropper.flatten!
39
45
 
40
46
  key_vector = Vector.new(keys)
41
- dropper_vector = parse_to_vector(dropper)
42
-
43
- picker =
44
- if dropper_vector.boolean?
45
- key_vector.filter(*dropper_vector.primitive_invert).each.map(&:to_sym)
46
- elsif dropper_vector.numeric?
47
- keys - key_vector.take(*dropper_vector).each.map(&:to_sym)
47
+ vec = parse_to_vector(dropper, vsize: n_keys)
48
+
49
+ ary =
50
+ if vec.boolean?
51
+ key_vector.filter(*vec.primitive_invert).each.map(&:to_sym) # Array
52
+ elsif vec.numeric?
53
+ keys - key_vector.take(*vec).each.map(&:to_sym) # Array
54
+ elsif vec.string? || vec.dictionary?
55
+ keys - vec.to_a.map { _1&.to_sym } # Array
48
56
  else
49
- keys - dropper
57
+ raise DataFrameArgumentError, "Invalid argument #{args}"
50
58
  end
51
59
 
52
- return DataFrame.new if picker.empty?
60
+ return DataFrame.new if ary.empty?
53
61
 
54
- # DataFrame#[] creates a Vector with single key is specified.
62
+ # DataFrame#[] creates a Vector if single key is specified.
55
63
  # DataFrame#drop creates a DataFrame with single key.
56
- return DataFrame.new(@table[picker]) if sym_or_str?(picker)
57
-
58
- raise DataFrameArgumentError, "Invalid argument #{args}"
64
+ DataFrame.new(@table[ary])
59
65
  end
60
66
 
61
67
  # rename variables to create a new DataFrame
@@ -3,35 +3,84 @@
3
3
  module RedAmber
4
4
  # group class
5
5
  class Group
6
+ include Enumerable # This feature is experimental
7
+
6
8
  # Creates a new Group object.
7
9
  #
8
10
  # @param dataframe [DataFrame] dataframe to be grouped.
9
11
  # @param group_keys [Array<>] keys for grouping.
10
12
  def initialize(dataframe, *group_keys)
11
13
  @dataframe = dataframe
12
- @table = @dataframe.table
13
14
  @group_keys = group_keys.flatten
14
15
 
15
- raise GroupArgumentError, 'group_keys is empty.' if @group_keys.empty?
16
+ raise GroupArgumentError, 'group_keys are empty.' if @group_keys.empty?
16
17
 
17
18
  d = @group_keys - @dataframe.keys
18
19
  raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless d.empty?
19
20
 
20
- @group = @table.group(*@group_keys)
21
+ @filters = @group_counts = @base_table = nil
22
+ @group = @dataframe.table.group(*@group_keys)
21
23
  end
22
24
 
25
+ attr_reader :dataframe, :group_keys
26
+
23
27
  functions = %i[count sum product mean min max stddev variance]
24
28
  functions.each do |function|
25
29
  define_method(function) do |*summary_keys|
26
- by(function, summary_keys)
30
+ summary_keys = Array(summary_keys).flatten
31
+ d = summary_keys - @dataframe.keys
32
+ raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
33
+
34
+ table = @group.aggregate(*build_aggregation_keys("hash_#{function}", summary_keys))
35
+ df = DataFrame.new(table)
36
+ df.pick(@group_keys, df.keys - @group_keys)
27
37
  end
28
38
  end
29
39
 
30
- def inspect
31
- tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
32
- h[v.key] = v.tally
40
+ alias_method :__count, :count
41
+ private :__count
42
+
43
+ def count(*summary_keys)
44
+ df = __count(summary_keys)
45
+ # if counts are the same (and do not include NaN or nil), aggregate count columns.
46
+ if df.pick(@group_keys.size..).to_h.values.uniq.size == 1
47
+ df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
48
+ else
49
+ df
33
50
  end
34
- "#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
51
+ end
52
+
53
+ def filters
54
+ @filters ||= begin
55
+ first, *others = @group_keys.map do |key|
56
+ vector = @dataframe[key]
57
+ vector.uniq.each.map { |u| u.nil? ? vector.is_nil : vector == u }
58
+ end
59
+
60
+ if others.empty?
61
+ first.select(&:any?)
62
+ else
63
+ first.product(*others).map { |a| a.reduce(&:&) }.select(&:any?)
64
+ end
65
+ end
66
+ end
67
+
68
+ def each
69
+ filters
70
+ return enum_for(:each) unless block_given?
71
+
72
+ @filters.each do |filter|
73
+ yield @dataframe[filter]
74
+ end
75
+ @filters.size
76
+ end
77
+
78
+ def group_count
79
+ DataFrame.new(add_columns_to_table(base_table, [:group_count], [group_counts]))
80
+ end
81
+
82
+ def inspect
83
+ "#<#{self.class} : #{format('0x%016x', object_id)}>\n#{group_count}"
35
84
  end
36
85
 
37
86
  def summarize(&block)
@@ -48,16 +97,50 @@ module RedAmber
48
97
 
49
98
  private
50
99
 
51
- def by(func, summary_keys)
52
- summary_keys = Array(summary_keys).flatten
53
- d = summary_keys - @dataframe.keys
54
- raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
100
+ def build_aggregation_keys(function_name, summary_keys)
101
+ if summary_keys.empty?
102
+ [function_name]
103
+ else
104
+ summary_keys.map { |key| "#{function_name}(#{key})" }
105
+ end
106
+ end
107
+
108
+ # @group_counts.sum == @dataframe.size
109
+ def group_counts
110
+ @group_counts ||= filters.map(&:sum)
111
+ end
112
+
113
+ def base_table
114
+ @base_table ||= begin
115
+ indexes = filters.map { |filter| filter.index(true) }
116
+ @dataframe.table[@group_keys].take(indexes)
117
+ end
118
+ end
119
+
120
+ def add_columns_to_table(table, keys, data_arrays)
121
+ fields = table.schema.fields
122
+ arrays = table.columns.map(&:data)
123
+
124
+ keys.zip(data_arrays).each do |key, array|
125
+ data = Arrow::ChunkedArray.new([array])
126
+ fields << Arrow::Field.new(key, data.value_data_type)
127
+ arrays << data
128
+ end
129
+
130
+ Arrow::Table.new(Arrow::Schema.new(fields), arrays)
131
+ end
55
132
 
56
- df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
57
- df = df[@group_keys, df.keys - @group_keys]
58
- # if counts are the same (no nil included), aggregate count columns.
59
- df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
60
- df
133
+ # Call Vector aggregating function and return an array of arrays:
134
+ # [keys, data_arrays]
135
+ # (Experimental feature)
136
+ def call_aggregating_function(func, summary_keys, _options)
137
+ summary_keys.each.with_object([[], []]) do |key, (keys, arrays)|
138
+ vector = @dataframe[key]
139
+ arrays << filters.map { |filter| vector.filter(filter).send(func) }
140
+ keys << "#{func}(#{key})".to_sym
141
+ rescue Arrow::Error::NotImplemented
142
+ # next
143
+ end
61
144
  end
62
145
  end
63
146
  end
@@ -9,52 +9,42 @@ module RedAmber
9
9
  num > 1 ? 's' : ''
10
10
  end
11
11
 
12
- def out_of_range?(indeces)
13
- indeces.max >= size || indeces.min < -size
14
- end
15
-
16
- def integers?(enum)
17
- enum.all?(Integer)
18
- end
19
-
20
- def sym_or_str?(enum)
21
- enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
22
- end
23
-
24
12
  def booleans?(enum)
25
13
  enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
26
14
  end
27
15
 
28
- def create_dataframe_from_vector(key, vector)
29
- DataFrame.new(key => vector.data)
30
- end
31
-
32
- def parse_to_vector(args)
16
+ def parse_to_vector(args, vsize: size)
33
17
  a = args.reduce([]) do |accum, elem|
34
- accum.concat(normalize_element(elem))
18
+ accum.concat(normalize_element(elem, vsize: vsize))
35
19
  end
36
20
  Vector.new(a)
37
21
  end
38
22
 
39
- def normalize_element(elem)
23
+ def normalize_element(elem, vsize: size)
40
24
  case elem
41
- when Numeric, String, Symbol, TrueClass, FalseClass, NilClass
42
- [elem]
25
+ when NilClass
26
+ [nil]
43
27
  when Range
44
- both_end = [elem.begin, elem.end]
45
- both_end[1] -= 1 if elem.exclude_end? && elem.end.is_a?(Integer)
46
-
47
- if both_end.any?(Integer) || both_end.all?(&:nil?)
48
- if both_end.any? { |e| e&.>=(size) || e&.<(-size) }
49
- raise DataFrameArgumentError, "Index out of range: #{elem} for 0..#{size - 1}"
28
+ bg = elem.begin
29
+ en = elem.end
30
+ if [bg, en].any?(Integer)
31
+ bg += vsize if bg&.negative?
32
+ en += vsize if en&.negative?
33
+ en -= 1 if en.is_a?(Integer) && elem.exclude_end?
34
+ if bg&.negative? || (en && en >= vsize)
35
+ raise DataFrameArgumentError, "Index out of range: #{elem} for 0..#{vsize - 1}"
50
36
  end
51
37
 
52
- (0...size).to_a[elem]
38
+ Array(0...vsize)[elem]
39
+ elsif bg.nil? && en.nil?
40
+ Array(0...vsize)
53
41
  else
54
- elem.to_a
42
+ Array(elem)
55
43
  end
44
+ when Enumerator
45
+ elem.to_a
56
46
  else
57
- Array(elem)
47
+ Array[elem]
58
48
  end
59
49
  end
60
50
  end
@@ -11,31 +11,39 @@ module RedAmber
11
11
  include Helper
12
12
 
13
13
  def initialize(*array)
14
- @key = nil # default is 'headless'
15
- if array.empty? || array[0].nil?
14
+ @key = nil # default is 'headless' Vector
15
+ if array.empty? || array.first.nil?
16
16
  Vector.new([])
17
17
  else
18
18
  array.flatten!
19
- case array[0]
20
- when Vector
21
- @data = array[0].data
22
- return
23
- when Arrow::Array, Arrow::ChunkedArray
24
- @data = array[0]
25
- return
26
- when Range
27
- @data = Arrow::Array.new(Array(array[0]))
28
- return
29
- end
30
- begin
31
- @data = Arrow::Array.new(Array(array))
32
- rescue Error
33
- raise VectorArgumentError, "Invalid argument: #{array}"
34
- end
19
+ @data =
20
+ case array
21
+ in [Vector => v]
22
+ v.data
23
+ in [Arrow::Array => a]
24
+ a
25
+ in [Arrow::ChunkedArray => ca]
26
+ ca
27
+ in [arrow_array_like] if arrow_array_like.respond_to?(:to_arrow_array)
28
+ arrow_array_like.to_arrow_array
29
+ in [Range => r]
30
+ Arrow::Array.new(Array(r))
31
+ else
32
+ begin
33
+ Arrow::Array.new(Array(array))
34
+ rescue Error
35
+ raise VectorArgumentError, "Invalid argument: #{array}"
36
+ end
37
+ end
35
38
  end
36
39
  end
37
40
 
38
41
  attr_reader :data
42
+
43
+ def to_arrow_array
44
+ @data
45
+ end
46
+
39
47
  attr_accessor :key
40
48
 
41
49
  def to_s
@@ -43,19 +51,24 @@ module RedAmber
43
51
  end
44
52
 
45
53
  def inspect(limit: 80)
46
- sio = StringIO.new << '['
47
- to_a.each_with_object(sio).with_index do |(e, s), i|
48
- next_str = "#{s.size > 1 ? ', ' : ''}#{e.inspect}"
49
- if (s.size + next_str.size) < limit
50
- s << next_str
51
- else
52
- s << ', ... ' if i < size
53
- break
54
+ if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
55
+ # Better performance than `.upcase == 'MINIMUM'``
56
+ "#{self.class}(:#{type}, size=#{size})"
57
+ else
58
+ sio = StringIO.new << '['
59
+ to_a.each_with_object(sio).with_index do |(e, s), i|
60
+ next_str = "#{s.size > 1 ? ', ' : ''}#{e.inspect}"
61
+ if (s.size + next_str.size) < limit
62
+ s << next_str
63
+ else
64
+ s << ', ... ' if i < size
65
+ break
66
+ end
54
67
  end
55
- end
56
- sio << ']'
68
+ sio << ']'
57
69
 
58
- format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n", object_id, sio.string
70
+ format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n", object_id, sio.string
71
+ end
59
72
  end
60
73
 
61
74
  def values
@@ -71,7 +84,7 @@ module RedAmber
71
84
  alias_method :indeces, :indices
72
85
 
73
86
  def to_ary
74
- to_a
87
+ values
75
88
  end
76
89
 
77
90
  def size
@@ -110,6 +123,10 @@ module RedAmber
110
123
  type_class == Arrow::StringDataType
111
124
  end
112
125
 
126
+ def dictionary?
127
+ type_class == Arrow::DictionaryDataType
128
+ end
129
+
113
130
  def temporal?
114
131
  type_class < Arrow::TemporalDataType
115
132
  end
@@ -126,10 +143,19 @@ module RedAmber
126
143
  end
127
144
  end
128
145
 
146
+ def map(&block)
147
+ return enum_for(:map) unless block
148
+
149
+ Vector.new(to_a.map(&block))
150
+ end
151
+ alias_method :collect, :map
152
+
153
+ # undocumented
129
154
  def chunked?
130
155
  @data.is_a? Arrow::ChunkedArray
131
156
  end
132
157
 
158
+ # undocumented
133
159
  def n_chunks
134
160
  chunked? ? @data.n_chunks : 0
135
161
  end
@@ -187,12 +187,6 @@ module RedAmber
187
187
  alias_method :ne, :not_equal
188
188
 
189
189
  def coerce(other)
190
- case other
191
- when Vector, Array, Arrow::Array
192
- raise VectorArgumentError, "Size unmatch: #{size} != #{other.length}" unless size == other.length
193
-
194
- [Vector.new(Array(other)), self]
195
- end
196
190
  [Vector.new(Array(other) * size), self]
197
191
  end
198
192
 
@@ -271,8 +265,6 @@ module RedAmber
271
265
  find(function).execute([data, other.data], options)
272
266
  when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar, Array, Numeric, String, TrueClass, FalseClass
273
267
  find(function).execute([data, other], options)
274
- else
275
- raise VectorArgumentError, "Operand is not supported: #{other.class}"
276
268
  end
277
269
  end
278
270