red_amber 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +33 -5
  3. data/.rubocop_todo.yml +2 -15
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +164 -18
  6. data/Gemfile +6 -1
  7. data/README.md +247 -33
  8. data/Rakefile +1 -0
  9. data/benchmark/csv_load_penguins.yml +1 -1
  10. data/doc/DataFrame.md +383 -219
  11. data/doc/Vector.md +247 -37
  12. data/doc/examples_of_red_amber.ipynb +5454 -0
  13. data/doc/image/dataframe/assign.png +0 -0
  14. data/doc/image/dataframe/drop.png +0 -0
  15. data/doc/image/dataframe/pick.png +0 -0
  16. data/doc/image/dataframe/remove.png +0 -0
  17. data/doc/image/dataframe/rename.png +0 -0
  18. data/doc/image/dataframe/slice.png +0 -0
  19. data/doc/image/dataframe_model.png +0 -0
  20. data/doc/image/vector/binary_element_wise.png +0 -0
  21. data/doc/image/vector/unary_aggregation.png +0 -0
  22. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  23. data/doc/image/vector/unary_element_wise.png +0 -0
  24. data/lib/red-amber.rb +3 -0
  25. data/lib/red_amber/data_frame.rb +62 -10
  26. data/lib/red_amber/data_frame_displayable.rb +86 -9
  27. data/lib/red_amber/data_frame_selectable.rb +151 -32
  28. data/lib/red_amber/data_frame_variable_operation.rb +4 -0
  29. data/lib/red_amber/group.rb +59 -0
  30. data/lib/red_amber/helper.rb +61 -0
  31. data/lib/red_amber/vector.rb +59 -15
  32. data/lib/red_amber/vector_functions.rb +47 -38
  33. data/lib/red_amber/vector_selectable.rb +126 -0
  34. data/lib/red_amber/vector_updatable.rb +125 -0
  35. data/lib/red_amber/version.rb +1 -1
  36. data/lib/red_amber.rb +6 -3
  37. data/red_amber.gemspec +0 -2
  38. metadata +9 -33
  39. data/lib/red_amber/data_frame_helper.rb +0 -64
  40. data/lib/red_amber/data_frame_observation_operation.rb +0 -83
  41. data/lib/red_amber/vector_compensable.rb +0 -68
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/lib/red-amber.rb ADDED
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'red_amber'
@@ -6,18 +6,14 @@ module RedAmber
6
6
  class DataFrame
7
7
  # mix-in
8
8
  include DataFrameDisplayable
9
- include DataFrameHelper
10
9
  include DataFrameIndexable
11
10
  include DataFrameSelectable
12
- include DataFrameObservationOperation
13
11
  include DataFrameVariableOperation
12
+ include Helper
14
13
 
15
14
  def initialize(*args)
16
15
  @variables = @keys = @vectors = @types = @data_types = nil
17
- # bug in gobject-introspection: ruby-gnome/ruby-gnome#1472
18
- # [Arrow::Table] == [nil] shows ArgumentError
19
- # temporary use yoda condition to workaround
20
- if args.empty? || args == [[]] || args == [{}] || [nil] == args
16
+ if args.empty? || args[0] == [] || args[0] == {} || args[0].nil?
21
17
  # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
22
18
  # returns empty DataFrame
23
19
  @table = Arrow::Table.new({}, [])
@@ -35,6 +31,7 @@ module RedAmber
35
31
  raise DataFrameTypeError, "invalid argument: #{arg}"
36
32
  end
37
33
  end
34
+ name_unnamed_keys
38
35
  end
39
36
 
40
37
  def self.load(path, options = {})
@@ -44,7 +41,7 @@ module RedAmber
44
41
  attr_reader :table
45
42
 
46
43
  def to_arrow
47
- table
44
+ @table
48
45
  end
49
46
 
50
47
  def save(output, options = {})
@@ -101,10 +98,10 @@ module RedAmber
101
98
  @vectors || @vectors = init_instance_vars(:vectors)
102
99
  end
103
100
 
104
- def indexes
105
- 0...size
101
+ def indices
102
+ (0...size).to_a
106
103
  end
107
- alias_method :indices, :indexes
104
+ alias_method :indexes, :indices
108
105
 
109
106
  def to_h
110
107
  variables.transform_values(&:to_a)
@@ -130,9 +127,27 @@ module RedAmber
130
127
  end
131
128
 
132
129
  def to_rover
130
+ require 'rover'
133
131
  Rover::DataFrame.new(to_h)
134
132
  end
135
133
 
134
+ def to_iruby
135
+ require 'iruby'
136
+ return ['text/plain', '(empty DataFrame)'] if empty?
137
+
138
+ if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
139
+ size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
140
+ else
141
+ ['text/html', html_table]
142
+ end
143
+ end
144
+
145
+ def group(*group_keys, &block)
146
+ g = Group.new(self, group_keys)
147
+ g = g.summarize(&block) if block
148
+ g
149
+ end
150
+
136
151
  private
137
152
 
138
153
  # initialize @variable, @keys, @vectors and return one of them
@@ -148,5 +163,42 @@ module RedAmber
148
163
  @variables, @keys, @vectors = ary
149
164
  ary[%i[variables keys vectors].index(var)]
150
165
  end
166
+
167
+ def html_table
168
+ reduced = size > 8 ? self[0..4, -4..-1] : self
169
+
170
+ converted = reduced.assign do
171
+ vectors.select.with_object({}) do |vector, assigner|
172
+ if vector.has_nil?
173
+ assigner[vector.key] = vector.to_a.map do |e|
174
+ e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
175
+ e = '""' if e.empty? # empty string
176
+ e.sub(/(\s+)/, '"\1"') # blank spaces
177
+ end
178
+ end
179
+ end
180
+ end
181
+
182
+ html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
183
+ "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
184
+ end
185
+
186
+ def name_unnamed_keys
187
+ return unless @table[:'']
188
+
189
+ # We can't use #keys because it causes mismatch of @table and @keys
190
+ keys = @table.schema.fields.map { |f| f.name.to_sym }
191
+ unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
192
+ fields =
193
+ @table.schema.fields.map do |field|
194
+ if field.name.empty?
195
+ Arrow::Field.new(unnamed, field.data_type)
196
+ else
197
+ field
198
+ end
199
+ end
200
+ schema = Arrow::Schema.new(fields)
201
+ @table = Arrow::Table.new(schema, @table.columns)
202
+ end
151
203
  end
152
204
  end
@@ -5,8 +5,12 @@ require 'stringio'
5
5
  module RedAmber
6
6
  # mix-ins for the class DataFrame
7
7
  module DataFrameDisplayable
8
+ INDEX_KEY = :index_key_for_format_table
9
+
8
10
  def to_s
9
- @table.to_s
11
+ return '' if empty?
12
+
13
+ format_table(width: 80)
10
14
  end
11
15
 
12
16
  # def describe() end
@@ -14,7 +18,11 @@ module RedAmber
14
18
  # def summary() end
15
19
 
16
20
  def inspect
17
- "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
21
+ if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
22
+ "#<#{shape_str(with_id: true)}>\n#{dataframe_info(3)}"
23
+ else
24
+ "#<#{shape_str(with_id: true)}>\n#{self}"
25
+ end
18
26
  end
19
27
 
20
28
  # - limit: max num of Vectors to show
@@ -30,10 +38,6 @@ module RedAmber
30
38
 
31
39
  private # =====
32
40
 
33
- def pl(num)
34
- num > 1 ? 's' : ''
35
- end
36
-
37
41
  def shape_str(with_id: false)
38
42
  shape_info = empty? ? '(empty)' : "#{size} x #{n_keys} Vector#{pl(n_keys)}"
39
43
  id = with_id ? format(', 0x%016x', object_id) : ''
@@ -81,12 +85,12 @@ module RedAmber
81
85
  end
82
86
 
83
87
  def make_header_format(levels, headers, quoted_keys)
84
- # find longest word to adjust column width
88
+ # find longest word to adjust width
85
89
  w_idx = n_keys.to_s.size
86
90
  w_key = [quoted_keys.map(&:size).max, headers[:key].size].max
87
91
  w_type = [types.map(&:size).max, headers[:type].size].max
88
- w_row = [levels.map { |l| l.to_s.size }.max, headers[:levels].size].max
89
- "%-#{w_idx}s %-#{w_key}s %-#{w_type}s %#{w_row}s %s\n"
92
+ w_level = [levels.map { |l| l.to_s.size }.max, headers[:levels].size].max
93
+ "%-#{w_idx}s %-#{w_key}s %-#{w_type}s %#{w_level}s %s\n"
90
94
  end
91
95
 
92
96
  def type_group(data_type)
@@ -128,5 +132,78 @@ module RedAmber
128
132
  a << "#{n_nil} nil#{pl(n_nil)}" unless n_nil.zero?
129
133
  a
130
134
  end
135
+
136
+ def format_table(width: 80)
137
+ head = 5
138
+ tail = 3
139
+ n_digit = 1
140
+
141
+ original = self
142
+ indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
143
+ df = slice(indices).assign do
144
+ assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
145
+ vectors.each_with_object(assigner) do |v, a|
146
+ a[v.key] = v.to_a.map do |e|
147
+ if e.nil?
148
+ '(nil)'
149
+ elsif v.float?
150
+ e.round(n_digit).to_s
151
+ elsif v.string?
152
+ e
153
+ else
154
+ e.to_s
155
+ end
156
+ end
157
+ end
158
+ end
159
+
160
+ df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
161
+ df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
162
+ df = df.assign do
163
+ vectors.each_with_object({}) do |v, assigner|
164
+ vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
165
+ .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
166
+ assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
167
+ end
168
+ end
169
+
170
+ width_list = df.vectors.map { |v| v.to_a.map(&:length).max }
171
+ total_length = width_list[-1] # reserved for last column
172
+
173
+ formats = []
174
+ row_ellipsis = nil
175
+ df.vectors.each_with_index do |v, i|
176
+ w = width_list[i]
177
+ if total_length + w > width && i < df.n_keys - 1
178
+ row_ellipsis = i
179
+ formats << '%3s'
180
+ formats << format_for_column(df.vectors[-1], original, width_list[-1])
181
+ break
182
+ end
183
+ formats << format_for_column(v, original, w)
184
+ total_length += w
185
+ end
186
+ format_str = formats.join(' ')
187
+
188
+ str = StringIO.new
189
+ if row_ellipsis
190
+ df = df[df.keys[0..row_ellipsis], df.keys[-1]]
191
+ df = df.assign(df.keys[row_ellipsis] => ['...'] * df.size)
192
+ end
193
+
194
+ df.to_a.each do |row|
195
+ str.puts format(format_str, *row).rstrip
196
+ end
197
+
198
+ str.string
199
+ end
200
+
201
+ def format_for_column(vector, original, width)
202
+ if vector.key != INDEX_KEY && !original[vector.key].numeric?
203
+ "%-#{width}s"
204
+ else
205
+ "%#{width}s"
206
+ end
207
+ end
131
208
  end
132
209
  end
@@ -3,35 +3,94 @@
3
3
  module RedAmber
4
4
  # mix-in for the class DataFrame
5
5
  module DataFrameSelectable
6
- # select columns: [symbol] or [string]
7
- # select rows: [array of index], [range]
6
+ # select variables: [symbol] or [string]
7
+ # select observations: [array of index], [range]
8
8
  def [](*args)
9
+ args.flatten!
9
10
  raise DataFrameArgumentError, 'Empty dataframe' if empty?
10
- raise DataFrameArgumentError, 'Empty argument' if args.empty?
11
-
12
- if args.one?
13
- case args[0]
14
- when Vector
15
- return select_obs_by_boolean(Arrow::BooleanArray.new(args[0].data))
16
- when Arrow::BooleanArray
17
- return select_obs_by_boolean(args[0])
18
- when Array
19
- return select_obs_by_boolean(Arrow::BooleanArray.new(args[0]))
20
-
21
- # when Hash
22
- # specify conditions to select by a Hash
23
- end
11
+ return remove_all_values if args.empty? || args[0].nil?
12
+
13
+ vector = parse_to_vector(args)
14
+ if vector.boolean?
15
+ return filter_by_vector(vector.data) if vector.size == size
16
+
17
+ raise DataFrameArgumentError, "Size is not match in booleans: #{args}"
18
+ end
19
+ return take_by_array(vector) if vector.numeric?
20
+ return select_vars_by_keys(vector.to_a.map(&:to_sym)) if vector.string? || vector.type == :dictionary
21
+
22
+ raise DataFrameArgumentError, "Invalid argument: #{args}"
23
+ end
24
+
25
+ # slice and select some observations to create sub DataFrame
26
+ def slice(*args, &block)
27
+ slicer = args
28
+ if block
29
+ raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
30
+
31
+ slicer = instance_eval(&block)
32
+ end
33
+ slicer = [slicer].flatten
34
+
35
+ raise DataFrameArgumentError, 'Empty dataframe' if empty?
36
+ return remove_all_values if slicer.empty? || slicer[0].nil?
37
+
38
+ vector = parse_to_vector(slicer)
39
+ if vector.boolean?
40
+ return filter_by_vector(vector.data) if vector.size == size
41
+
42
+ raise DataFrameArgumentError, "Size is not match in booleans: #{slicer}"
24
43
  end
44
+ return take_by_array(vector) if vector.numeric?
45
+
46
+ raise DataFrameArgumentError, "Invalid argument #{slicer}"
47
+ end
48
+
49
+ # remove selected observations to create sub DataFrame
50
+ def remove(*args, &block)
51
+ remover = args
52
+ if block
53
+ raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
54
+
55
+ remover = instance_eval(&block)
56
+ end
57
+ remover = [remover].flatten
58
+
59
+ raise DataFrameArgumentError, 'Empty dataframe' if empty?
60
+ return self if remover.empty? || remover[0].nil?
61
+
62
+ vector = parse_to_vector(remover)
63
+ if vector.boolean?
64
+ return filter_by_vector(vector.primitive_invert.data) if vector.size == size
65
+
66
+ raise DataFrameArgumentError, "Size is not match in booleans: #{remover}"
67
+ end
68
+ if vector.numeric?
69
+ raise DataFrameArgumentError, "Index out of range: #{vector.min}" if vector.min <= -size - 1
70
+
71
+ normalized_indices = (vector < 0).if_else(vector + size, vector) # normalize index from tail
72
+ if normalized_indices.max >= size
73
+ raise DataFrameArgumentError, "Index out of range: #{normalized_indices.max}"
74
+ end
25
75
 
26
- return select_obs_by_boolean(args) if booleans?(args)
76
+ normalized_indices = normalized_indices.floor.to_a.map(&:to_i) # round to integer array
77
+ return remove_all_values if normalized_indices == indices
78
+ return self if normalized_indices.empty?
27
79
 
28
- # expand Range like [1..3, 4] to [1, 2, 3, 4]
29
- expanded = expand_range(args)
30
- return map_indices(*expanded) if integers?(expanded)
31
- return select_vars_by_keys(expanded.map(&:to_sym)) if sym_or_str?(expanded)
80
+ index_array = indices - normalized_indices
32
81
 
33
- raise DataFrameArgumentError, "Invalid argument #{args}"
82
+ datum = Arrow::Function.find(:take).execute([table, index_array])
83
+ return DataFrame.new(datum.value)
84
+ end
85
+
86
+ raise DataFrameArgumentError, "Invalid argument #{remover}"
87
+ end
88
+
89
+ def remove_nil
90
+ func = Arrow::Function.find(:drop_null)
91
+ DataFrame.new(func.execute([table]).value)
34
92
  end
93
+ alias_method :drop_nil, :remove_nil
35
94
 
36
95
  # Select a variable by a key in String or Symbol
37
96
  def v(key)
@@ -43,24 +102,57 @@ module RedAmber
43
102
  variables[key.to_sym]
44
103
  end
45
104
 
46
- def head(n_rows = 5)
47
- raise DataFrameArgumentError, "Index is out of range #{n_rows}" if n_rows.negative?
105
+ def head(n_obs = 5)
106
+ raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
48
107
 
49
- self[0...[n_rows, size].min]
108
+ self[0...[n_obs, size].min]
50
109
  end
51
110
 
52
- def tail(n_rows = 5)
53
- raise DataFrameArgumentError, "Index is out of range #{n_rows}" if n_rows.negative?
111
+ def tail(n_obs = 5)
112
+ raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
54
113
 
55
- self[-[n_rows, size].min..]
114
+ self[-[n_obs, size].min..]
56
115
  end
57
116
 
58
- def first(n_rows = 1)
59
- head(n_rows)
117
+ def first(n_obs = 1)
118
+ head(n_obs)
60
119
  end
61
120
 
62
- def last(n_rows = 1)
63
- tail(n_rows)
121
+ def last(n_obs = 1)
122
+ tail(n_obs)
123
+ end
124
+
125
+ # Undocumented
126
+ # TODO: support for option {boundscheck: true}
127
+ def take(*indices)
128
+ indices.flatten!
129
+ return remove_all_values if indices.empty?
130
+
131
+ indices = indices[0] if indices.one? && !indices[0].is_a?(Numeric)
132
+ indices = Vector.new(indices) unless indices.is_a?(Vector)
133
+
134
+ take_by_array(indices)
135
+ end
136
+
137
+ # Undocumented
138
+ # TODO: support for option {null_selection_behavior: :drop}
139
+ def filter(*booleans)
140
+ booleans.flatten!
141
+ return remove_all_values if booleans.empty?
142
+
143
+ b = booleans[0]
144
+ case b
145
+ when Vector
146
+ raise DataFrameArgumentError, 'Argument is not a boolean.' unless b.boolean?
147
+
148
+ filter_by_vector(b.data)
149
+ when Arrow::BooleanArray
150
+ filter_by_vector(b)
151
+ else
152
+ raise DataFrameArgumentError, 'Argument is not a boolean.' unless booleans?(booleans)
153
+
154
+ filter_by_vector(Arrow::BooleanArray.new(booleans))
155
+ end
64
156
  end
65
157
 
66
158
  private
@@ -75,5 +167,32 @@ module RedAmber
75
167
  DataFrame.new(@table[keys])
76
168
  end
77
169
  end
170
+
171
+ # Accepts indices by numeric Vector
172
+ def take_by_array(indices)
173
+ raise DataFrameArgumentError, "Indices must be a numeric Vector: #{indices}" unless indices.numeric?
174
+ raise DataFrameArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
175
+
176
+ normalized_indices = (indices < 0).if_else(indices + size, indices) # normalize index from tail
177
+ raise DataFrameArgumentError, "Index out of range: #{normalized_indices.max}" if normalized_indices.max >= size
178
+
179
+ index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
180
+
181
+ datum = Arrow::Function.find(:take).execute([table, index_array])
182
+ DataFrame.new(datum.value)
183
+ end
184
+
185
+ # Accepts booleans by Arrow::BooleanArray
186
+ def filter_by_vector(boolean_array)
187
+ raise DataFrameArgumentError, 'Booleans must be same size as self.' unless boolean_array.length == size
188
+
189
+ datum = Arrow::Function.find(:filter).execute([table, boolean_array])
190
+ DataFrame.new(datum.value)
191
+ end
192
+
193
+ # return a DataFrame with same keys as self without values
194
+ def remove_all_values
195
+ filter_by_vector(Arrow::BooleanArray.new([false] * size))
196
+ end
78
197
  end
79
198
  end
@@ -129,5 +129,9 @@ module RedAmber
129
129
  arrays << Arrow::ChunkedArray.new([a])
130
130
  end
131
131
  end
132
+
133
+ def keys_by_booleans(booleans)
134
+ keys.select.with_index { |_, i| booleans[i] }
135
+ end
132
136
  end
133
137
  end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # group class
5
+ class Group
6
+ def initialize(dataframe, *group_keys)
7
+ @dataframe = dataframe
8
+ @table = @dataframe.table
9
+ @group_keys = group_keys.flatten
10
+
11
+ raise GroupArgumentError, 'group_keys is empty.' if @group_keys.empty?
12
+
13
+ d = @group_keys - @dataframe.keys
14
+ raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless d.empty?
15
+
16
+ @group = @table.group(*@group_keys)
17
+ end
18
+
19
+ functions = %i[count sum product mean min max stddev variance]
20
+ functions.each do |function|
21
+ define_method(function) do |*summary_keys|
22
+ by(function, summary_keys)
23
+ end
24
+ end
25
+
26
+ def inspect
27
+ tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
28
+ h[v.key] = v.tally
29
+ end
30
+ "#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
31
+ end
32
+
33
+ def summarize(&block)
34
+ agg = instance_eval(&block)
35
+ case agg
36
+ when DataFrame
37
+ agg
38
+ when Array
39
+ agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
40
+ else
41
+ raise GroupArgumentError, "Unknown argument: #{agg}"
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ def by(func, summary_keys)
48
+ summary_keys = Array(summary_keys).flatten
49
+ d = summary_keys - @dataframe.keys
50
+ raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
51
+
52
+ df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
53
+ df = df[df.keys[-1], df.keys[0...-1]]
54
+ # if counts are the same (no nil included), aggregate count columns.
55
+ df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
56
+ df
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # mix-in for the class DataFrame
5
+ module Helper
6
+ private
7
+
8
+ def pl(num)
9
+ num > 1 ? 's' : ''
10
+ end
11
+
12
+ def out_of_range?(indeces)
13
+ indeces.max >= size || indeces.min < -size
14
+ end
15
+
16
+ def integers?(enum)
17
+ enum.all?(Integer)
18
+ end
19
+
20
+ def sym_or_str?(enum)
21
+ enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
22
+ end
23
+
24
+ def booleans?(enum)
25
+ enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
26
+ end
27
+
28
+ def create_dataframe_from_vector(key, vector)
29
+ DataFrame.new(key => vector.data)
30
+ end
31
+
32
+ def parse_to_vector(args)
33
+ a = args.reduce([]) do |accum, elem|
34
+ accum.concat(normalize_element(elem))
35
+ end
36
+ Vector.new(a)
37
+ end
38
+
39
+ def normalize_element(elem)
40
+ case elem
41
+ when Numeric, String, Symbol, TrueClass, FalseClass, NilClass
42
+ [elem]
43
+ when Range
44
+ both_end = [elem.begin, elem.end]
45
+ both_end[1] -= 1 if elem.exclude_end? && elem.end.is_a?(Integer)
46
+
47
+ if both_end.any?(Integer) || both_end.all?(&:nil?)
48
+ if both_end.any? { |e| e&.>=(size) || e&.<(-size) }
49
+ raise DataFrameArgumentError, "Index out of range: #{elem} for 0..#{size - 1}"
50
+ end
51
+
52
+ (0...size).to_a[elem]
53
+ else
54
+ elem.to_a
55
+ end
56
+ else
57
+ Array(elem)
58
+ end
59
+ end
60
+ end
61
+ end