red_amber 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.rubocop.yml +24 -5
 - data/CHANGELOG.md +98 -13
 - data/Gemfile +1 -0
 - data/README.md +55 -6
 - data/doc/DataFrame.md +23 -9
 - data/doc/Vector.md +156 -24
 - data/lib/red-amber.rb +27 -0
 - data/lib/red_amber/data_frame.rb +39 -7
 - data/lib/red_amber/data_frame_displayable.rb +8 -8
 - data/lib/red_amber/data_frame_observation_operation.rb +0 -72
 - data/lib/red_amber/data_frame_selectable.rb +151 -32
 - data/lib/red_amber/data_frame_variable_operation.rb +4 -0
 - data/lib/red_amber/helper.rb +61 -0
 - data/lib/red_amber/vector.rb +42 -12
 - data/lib/red_amber/vector_functions.rb +25 -18
 - data/lib/red_amber/vector_selectable.rb +124 -0
 - data/lib/red_amber/{vector_compensable.rb → vector_updatable.rb} +52 -16
 - data/lib/red_amber/version.rb +1 -1
 - data/lib/red_amber.rb +1 -24
 - metadata +6 -4
 - data/lib/red_amber/data_frame_helper.rb +0 -64
 
| 
         @@ -3,35 +3,94 @@ 
     | 
|
| 
       3 
3 
     | 
    
         
             
            module RedAmber
         
     | 
| 
       4 
4 
     | 
    
         
             
              # mix-in for the class DataFrame
         
     | 
| 
       5 
5 
     | 
    
         
             
              module DataFrameSelectable
         
     | 
| 
       6 
     | 
    
         
            -
                # select  
     | 
| 
       7 
     | 
    
         
            -
                # select  
     | 
| 
      
 6 
     | 
    
         
            +
                # select variables: [symbol] or [string]
         
     | 
| 
      
 7 
     | 
    
         
            +
                # select observations: [array of index], [range]
         
     | 
| 
       8 
8 
     | 
    
         
             
                def [](*args)
         
     | 
| 
      
 9 
     | 
    
         
            +
                  args.flatten!
         
     | 
| 
       9 
10 
     | 
    
         
             
                  raise DataFrameArgumentError, 'Empty dataframe' if empty?
         
     | 
| 
       10 
     | 
    
         
            -
                   
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
                   
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
                     
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                     
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
      
 11 
     | 
    
         
            +
                  return remove_all_values if args.empty? || args[0].nil?
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                  vector = parse_to_vector(args)
         
     | 
| 
      
 14 
     | 
    
         
            +
                  if vector.boolean?
         
     | 
| 
      
 15 
     | 
    
         
            +
                    return filter_by_vector(vector.data) if vector.size == size
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                    raise DataFrameArgumentError, "Size is not match in booleans: #{args}"
         
     | 
| 
      
 18 
     | 
    
         
            +
                  end
         
     | 
| 
      
 19 
     | 
    
         
            +
                  return take_by_array(vector) if vector.numeric?
         
     | 
| 
      
 20 
     | 
    
         
            +
                  return select_vars_by_keys(vector.to_a.map(&:to_sym)) if vector.string? || vector.type == :dictionary
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                  raise DataFrameArgumentError, "Invalid argument: #{args}"
         
     | 
| 
      
 23 
     | 
    
         
            +
                end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                # slice and select some observations to create sub DataFrame
         
     | 
| 
      
 26 
     | 
    
         
            +
                def slice(*args, &block)
         
     | 
| 
      
 27 
     | 
    
         
            +
                  slicer = args
         
     | 
| 
      
 28 
     | 
    
         
            +
                  if block
         
     | 
| 
      
 29 
     | 
    
         
            +
                    raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                    slicer = instance_eval(&block)
         
     | 
| 
      
 32 
     | 
    
         
            +
                  end
         
     | 
| 
      
 33 
     | 
    
         
            +
                  slicer = [slicer].flatten
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                  raise DataFrameArgumentError, 'Empty dataframe' if empty?
         
     | 
| 
      
 36 
     | 
    
         
            +
                  return remove_all_values if slicer.empty? || slicer[0].nil?
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                  vector = parse_to_vector(slicer)
         
     | 
| 
      
 39 
     | 
    
         
            +
                  if vector.boolean?
         
     | 
| 
      
 40 
     | 
    
         
            +
                    return filter_by_vector(vector.data) if vector.size == size
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                    raise DataFrameArgumentError, "Size is not match in booleans: #{slicer}"
         
     | 
| 
       24 
43 
     | 
    
         
             
                  end
         
     | 
| 
      
 44 
     | 
    
         
            +
                  return take_by_array(vector) if vector.numeric?
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                  raise DataFrameArgumentError, "Invalid argument #{slicer}"
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                # remove selected observations to create sub DataFrame
         
     | 
| 
      
 50 
     | 
    
         
            +
                def remove(*args, &block)
         
     | 
| 
      
 51 
     | 
    
         
            +
                  remover = args
         
     | 
| 
      
 52 
     | 
    
         
            +
                  if block
         
     | 
| 
      
 53 
     | 
    
         
            +
                    raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                    remover = instance_eval(&block)
         
     | 
| 
      
 56 
     | 
    
         
            +
                  end
         
     | 
| 
      
 57 
     | 
    
         
            +
                  remover = [remover].flatten
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
                  raise DataFrameArgumentError, 'Empty dataframe' if empty?
         
     | 
| 
      
 60 
     | 
    
         
            +
                  return self if remover.empty? || remover[0].nil?
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                  vector = parse_to_vector(remover)
         
     | 
| 
      
 63 
     | 
    
         
            +
                  if vector.boolean?
         
     | 
| 
      
 64 
     | 
    
         
            +
                    return filter_by_vector(vector.primitive_invert.data) if vector.size == size
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                    raise DataFrameArgumentError, "Size is not match in booleans: #{remover}"
         
     | 
| 
      
 67 
     | 
    
         
            +
                  end
         
     | 
| 
      
 68 
     | 
    
         
            +
                  if vector.numeric?
         
     | 
| 
      
 69 
     | 
    
         
            +
                    raise DataFrameArgumentError, "Index out of range: #{vector.min}" if vector.min <= -size - 1
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
                    normalized_indices = (vector < 0).if_else(vector + size, vector) # normalize index from tail
         
     | 
| 
      
 72 
     | 
    
         
            +
                    if normalized_indices.max >= size
         
     | 
| 
      
 73 
     | 
    
         
            +
                      raise DataFrameArgumentError, "Index out of range: #{normalized_indices.max}"
         
     | 
| 
      
 74 
     | 
    
         
            +
                    end
         
     | 
| 
       25 
75 
     | 
    
         | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
      
 76 
     | 
    
         
            +
                    normalized_indices = normalized_indices.floor.to_a.map(&:to_i) # round to integer array
         
     | 
| 
      
 77 
     | 
    
         
            +
                    return remove_all_values if normalized_indices == indices
         
     | 
| 
      
 78 
     | 
    
         
            +
                    return self if normalized_indices.empty?
         
     | 
| 
       27 
79 
     | 
    
         | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
                  expanded = expand_range(args)
         
     | 
| 
       30 
     | 
    
         
            -
                  return map_indices(*expanded) if integers?(expanded)
         
     | 
| 
       31 
     | 
    
         
            -
                  return select_vars_by_keys(expanded.map(&:to_sym)) if sym_or_str?(expanded)
         
     | 
| 
      
 80 
     | 
    
         
            +
                    index_array = indices - normalized_indices
         
     | 
| 
       32 
81 
     | 
    
         | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
      
 82 
     | 
    
         
            +
                    datum = Arrow::Function.find(:take).execute([table, index_array])
         
     | 
| 
      
 83 
     | 
    
         
            +
                    return DataFrame.new(datum.value)
         
     | 
| 
      
 84 
     | 
    
         
            +
                  end
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
                  raise DataFrameArgumentError, "Invalid argument #{remover}"
         
     | 
| 
      
 87 
     | 
    
         
            +
                end
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                def remove_nil
         
     | 
| 
      
 90 
     | 
    
         
            +
                  func = Arrow::Function.find(:drop_null)
         
     | 
| 
      
 91 
     | 
    
         
            +
                  DataFrame.new(func.execute([table]).value)
         
     | 
| 
       34 
92 
     | 
    
         
             
                end
         
     | 
| 
      
 93 
     | 
    
         
            +
                alias_method :drop_nil, :remove_nil
         
     | 
| 
       35 
94 
     | 
    
         | 
| 
       36 
95 
     | 
    
         
             
                # Select a variable by a key in String or Symbol
         
     | 
| 
       37 
96 
     | 
    
         
             
                def v(key)
         
     | 
| 
         @@ -43,24 +102,57 @@ module RedAmber 
     | 
|
| 
       43 
102 
     | 
    
         
             
                  variables[key.to_sym]
         
     | 
| 
       44 
103 
     | 
    
         
             
                end
         
     | 
| 
       45 
104 
     | 
    
         | 
| 
       46 
     | 
    
         
            -
                def head( 
     | 
| 
       47 
     | 
    
         
            -
                  raise DataFrameArgumentError, "Index is out of range #{ 
     | 
| 
      
 105 
     | 
    
         
            +
                def head(n_obs = 5)
         
     | 
| 
      
 106 
     | 
    
         
            +
                  raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
         
     | 
| 
       48 
107 
     | 
    
         | 
| 
       49 
     | 
    
         
            -
                  self[0...[ 
     | 
| 
      
 108 
     | 
    
         
            +
                  self[0...[n_obs, size].min]
         
     | 
| 
       50 
109 
     | 
    
         
             
                end
         
     | 
| 
       51 
110 
     | 
    
         | 
| 
       52 
     | 
    
         
            -
                def tail( 
     | 
| 
       53 
     | 
    
         
            -
                  raise DataFrameArgumentError, "Index is out of range #{ 
     | 
| 
      
 111 
     | 
    
         
            +
                def tail(n_obs = 5)
         
     | 
| 
      
 112 
     | 
    
         
            +
                  raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
         
     | 
| 
       54 
113 
     | 
    
         | 
| 
       55 
     | 
    
         
            -
                  self[-[ 
     | 
| 
      
 114 
     | 
    
         
            +
                  self[-[n_obs, size].min..]
         
     | 
| 
       56 
115 
     | 
    
         
             
                end
         
     | 
| 
       57 
116 
     | 
    
         | 
| 
       58 
     | 
    
         
            -
                def first( 
     | 
| 
       59 
     | 
    
         
            -
                  head( 
     | 
| 
      
 117 
     | 
    
         
            +
                def first(n_obs = 1)
         
     | 
| 
      
 118 
     | 
    
         
            +
                  head(n_obs)
         
     | 
| 
       60 
119 
     | 
    
         
             
                end
         
     | 
| 
       61 
120 
     | 
    
         | 
| 
       62 
     | 
    
         
            -
                def last( 
     | 
| 
       63 
     | 
    
         
            -
                  tail( 
     | 
| 
      
 121 
     | 
    
         
            +
                def last(n_obs = 1)
         
     | 
| 
      
 122 
     | 
    
         
            +
                  tail(n_obs)
         
     | 
| 
      
 123 
     | 
    
         
            +
                end
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
                # Undocumented
         
     | 
| 
      
 126 
     | 
    
         
            +
                # TODO: support for option {boundscheck: true}
         
     | 
| 
      
 127 
     | 
    
         
            +
                def take(*indices)
         
     | 
| 
      
 128 
     | 
    
         
            +
                  indices.flatten!
         
     | 
| 
      
 129 
     | 
    
         
            +
                  return remove_all_values if indices.empty?
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
                  indices = indices[0] if indices.one? && !indices[0].is_a?(Numeric)
         
     | 
| 
      
 132 
     | 
    
         
            +
                  indices = Vector.new(indices) unless indices.is_a?(Vector)
         
     | 
| 
      
 133 
     | 
    
         
            +
             
     | 
| 
      
 134 
     | 
    
         
            +
                  take_by_array(indices)
         
     | 
| 
      
 135 
     | 
    
         
            +
                end
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
                # Undocumented
         
     | 
| 
      
 138 
     | 
    
         
            +
                # TODO: support for option {null_selection_behavior: :drop}
         
     | 
| 
      
 139 
     | 
    
         
            +
                def filter(*booleans)
         
     | 
| 
      
 140 
     | 
    
         
            +
                  booleans.flatten!
         
     | 
| 
      
 141 
     | 
    
         
            +
                  return remove_all_values if booleans.empty?
         
     | 
| 
      
 142 
     | 
    
         
            +
             
     | 
| 
      
 143 
     | 
    
         
            +
                  b = booleans[0]
         
     | 
| 
      
 144 
     | 
    
         
            +
                  case b
         
     | 
| 
      
 145 
     | 
    
         
            +
                  when Vector
         
     | 
| 
      
 146 
     | 
    
         
            +
                    raise DataFrameArgumentError, 'Argument is not a boolean.' unless b.boolean?
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
                    filter_by_vector(b.data)
         
     | 
| 
      
 149 
     | 
    
         
            +
                  when Arrow::BooleanArray
         
     | 
| 
      
 150 
     | 
    
         
            +
                    filter_by_vector(b)
         
     | 
| 
      
 151 
     | 
    
         
            +
                  else
         
     | 
| 
      
 152 
     | 
    
         
            +
                    raise DataFrameArgumentError, 'Argument is not a boolean.' unless booleans?(booleans)
         
     | 
| 
      
 153 
     | 
    
         
            +
             
     | 
| 
      
 154 
     | 
    
         
            +
                    filter_by_vector(Arrow::BooleanArray.new(booleans))
         
     | 
| 
      
 155 
     | 
    
         
            +
                  end
         
     | 
| 
       64 
156 
     | 
    
         
             
                end
         
     | 
| 
       65 
157 
     | 
    
         | 
| 
       66 
158 
     | 
    
         
             
                private
         
     | 
| 
         @@ -75,5 +167,32 @@ module RedAmber 
     | 
|
| 
       75 
167 
     | 
    
         
             
                    DataFrame.new(@table[keys])
         
     | 
| 
       76 
168 
     | 
    
         
             
                  end
         
     | 
| 
       77 
169 
     | 
    
         
             
                end
         
     | 
| 
      
 170 
     | 
    
         
            +
             
     | 
| 
      
 171 
     | 
    
         
            +
                # Accepts indices by numeric Vector
         
     | 
| 
      
 172 
     | 
    
         
            +
                def take_by_array(indices)
         
     | 
| 
      
 173 
     | 
    
         
            +
                  raise DataFrameArgumentError, "Indices must be a numeric Vector: #{indices}" unless indices.numeric?
         
     | 
| 
      
 174 
     | 
    
         
            +
                  raise DataFrameArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
         
     | 
| 
      
 175 
     | 
    
         
            +
             
     | 
| 
      
 176 
     | 
    
         
            +
                  normalized_indices = (indices < 0).if_else(indices + size, indices) # normalize index from tail
         
     | 
| 
      
 177 
     | 
    
         
            +
                  raise DataFrameArgumentError, "Index out of range: #{normalized_indices.max}" if normalized_indices.max >= size
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
      
 179 
     | 
    
         
            +
                  index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
         
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
                  datum = Arrow::Function.find(:take).execute([table, index_array])
         
     | 
| 
      
 182 
     | 
    
         
            +
                  DataFrame.new(datum.value)
         
     | 
| 
      
 183 
     | 
    
         
            +
                end
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
                # Accepts booleans by Arrow::BooleanArray
         
     | 
| 
      
 186 
     | 
    
         
            +
                def filter_by_vector(boolean_array)
         
     | 
| 
      
 187 
     | 
    
         
            +
                  raise DataFrameArgumentError, 'Booleans must be same size as self.' unless boolean_array.length == size
         
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
                  datum = Arrow::Function.find(:filter).execute([table, boolean_array])
         
     | 
| 
      
 190 
     | 
    
         
            +
                  DataFrame.new(datum.value)
         
     | 
| 
      
 191 
     | 
    
         
            +
                end
         
     | 
| 
      
 192 
     | 
    
         
            +
             
     | 
| 
      
 193 
     | 
    
         
            +
                # return a DataFrame with same keys as self without values
         
     | 
| 
      
 194 
     | 
    
         
            +
                def remove_all_values
         
     | 
| 
      
 195 
     | 
    
         
            +
                  filter_by_vector(Arrow::BooleanArray.new([false] * size))
         
     | 
| 
      
 196 
     | 
    
         
            +
                end
         
     | 
| 
       78 
197 
     | 
    
         
             
              end
         
     | 
| 
       79 
198 
     | 
    
         
             
            end
         
     | 
| 
         @@ -0,0 +1,61 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module RedAmber
         
     | 
| 
      
 4 
     | 
    
         
            +
              # mix-in for the class DataFrame
         
     | 
| 
      
 5 
     | 
    
         
            +
              module Helper
         
     | 
| 
      
 6 
     | 
    
         
            +
                private
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                def pl(num)
         
     | 
| 
      
 9 
     | 
    
         
            +
                  num > 1 ? 's' : ''
         
     | 
| 
      
 10 
     | 
    
         
            +
                end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                def out_of_range?(indeces)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  indeces.max >= size || indeces.min < -size
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                def integers?(enum)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  enum.all?(Integer)
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                def sym_or_str?(enum)
         
     | 
| 
      
 21 
     | 
    
         
            +
                  enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
         
     | 
| 
      
 22 
     | 
    
         
            +
                end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                def booleans?(enum)
         
     | 
| 
      
 25 
     | 
    
         
            +
                  enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
         
     | 
| 
      
 26 
     | 
    
         
            +
                end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                def create_dataframe_from_vector(key, vector)
         
     | 
| 
      
 29 
     | 
    
         
            +
                  DataFrame.new(key => vector.data)
         
     | 
| 
      
 30 
     | 
    
         
            +
                end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                def parse_to_vector(args)
         
     | 
| 
      
 33 
     | 
    
         
            +
                  a = args.reduce([]) do |accum, elem|
         
     | 
| 
      
 34 
     | 
    
         
            +
                    accum.concat(normalize_element(elem))
         
     | 
| 
      
 35 
     | 
    
         
            +
                  end
         
     | 
| 
      
 36 
     | 
    
         
            +
                  Vector.new(a)
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                def normalize_element(elem)
         
     | 
| 
      
 40 
     | 
    
         
            +
                  case elem
         
     | 
| 
      
 41 
     | 
    
         
            +
                  when Numeric, String, Symbol, TrueClass, FalseClass, NilClass
         
     | 
| 
      
 42 
     | 
    
         
            +
                    [elem]
         
     | 
| 
      
 43 
     | 
    
         
            +
                  when Range
         
     | 
| 
      
 44 
     | 
    
         
            +
                    both_end = [elem.begin, elem.end]
         
     | 
| 
      
 45 
     | 
    
         
            +
                    both_end[1] -= 1 if elem.exclude_end? && elem.end.is_a?(Integer)
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                    if both_end.any?(Integer) || both_end.all?(&:nil?)
         
     | 
| 
      
 48 
     | 
    
         
            +
                      if both_end.any? { |e| e&.>=(size) || e&.<(-size) }
         
     | 
| 
      
 49 
     | 
    
         
            +
                        raise DataFrameArgumentError, "Index out of range: #{elem} for 0..#{size - 1}"
         
     | 
| 
      
 50 
     | 
    
         
            +
                      end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                      (0...size).to_a[elem]
         
     | 
| 
      
 53 
     | 
    
         
            +
                    else
         
     | 
| 
      
 54 
     | 
    
         
            +
                      elem.to_a
         
     | 
| 
      
 55 
     | 
    
         
            +
                    end
         
     | 
| 
      
 56 
     | 
    
         
            +
                  else
         
     | 
| 
      
 57 
     | 
    
         
            +
                    Array(elem)
         
     | 
| 
      
 58 
     | 
    
         
            +
                  end
         
     | 
| 
      
 59 
     | 
    
         
            +
                end
         
     | 
| 
      
 60 
     | 
    
         
            +
              end
         
     | 
| 
      
 61 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/red_amber/vector.rb
    CHANGED
    
    | 
         @@ -1,25 +1,37 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            module RedAmber
         
     | 
| 
       4 
     | 
    
         
            -
              #  
     | 
| 
      
 4 
     | 
    
         
            +
              # Values in variable (columnar) data object
         
     | 
| 
       5 
5 
     | 
    
         
             
              #   @data : holds Arrow::ChunkedArray
         
     | 
| 
       6 
6 
     | 
    
         
             
              class Vector
         
     | 
| 
       7 
7 
     | 
    
         
             
                # mix-in
         
     | 
| 
       8 
     | 
    
         
            -
                include VectorCompensable
         
     | 
| 
       9 
8 
     | 
    
         
             
                include VectorFunctions
         
     | 
| 
      
 9 
     | 
    
         
            +
                include VectorUpdatable
         
     | 
| 
      
 10 
     | 
    
         
            +
                include VectorSelectable
         
     | 
| 
      
 11 
     | 
    
         
            +
                include Helper
         
     | 
| 
       10 
12 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
                 
     | 
| 
       12 
     | 
    
         
            -
                def initialize(array)
         
     | 
| 
      
 13 
     | 
    
         
            +
                def initialize(*array)
         
     | 
| 
       13 
14 
     | 
    
         
             
                  @key = nil # default is 'headless'
         
     | 
| 
       14 
     | 
    
         
            -
                   
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                    @data = array.data
         
     | 
| 
       17 
     | 
    
         
            -
                  when Arrow::Array, Arrow::ChunkedArray
         
     | 
| 
       18 
     | 
    
         
            -
                    @data = array
         
     | 
| 
       19 
     | 
    
         
            -
                  when Array
         
     | 
| 
       20 
     | 
    
         
            -
                    @data = Arrow::Array.new(array)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  if array.empty? || array[0].nil?
         
     | 
| 
      
 16 
     | 
    
         
            +
                    Vector.new([])
         
     | 
| 
       21 
17 
     | 
    
         
             
                  else
         
     | 
| 
       22 
     | 
    
         
            -
                     
     | 
| 
      
 18 
     | 
    
         
            +
                    array.flatten!
         
     | 
| 
      
 19 
     | 
    
         
            +
                    case array[0]
         
     | 
| 
      
 20 
     | 
    
         
            +
                    when Vector
         
     | 
| 
      
 21 
     | 
    
         
            +
                      @data = array[0].data
         
     | 
| 
      
 22 
     | 
    
         
            +
                      return
         
     | 
| 
      
 23 
     | 
    
         
            +
                    when Arrow::Array, Arrow::ChunkedArray
         
     | 
| 
      
 24 
     | 
    
         
            +
                      @data = array[0]
         
     | 
| 
      
 25 
     | 
    
         
            +
                      return
         
     | 
| 
      
 26 
     | 
    
         
            +
                    when Range
         
     | 
| 
      
 27 
     | 
    
         
            +
                      @data = Arrow::Array.new(Array(array[0]))
         
     | 
| 
      
 28 
     | 
    
         
            +
                      return
         
     | 
| 
      
 29 
     | 
    
         
            +
                    end
         
     | 
| 
      
 30 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 31 
     | 
    
         
            +
                      @data = Arrow::Array.new(Array(array))
         
     | 
| 
      
 32 
     | 
    
         
            +
                    rescue Error
         
     | 
| 
      
 33 
     | 
    
         
            +
                      raise VectorArgumentError, "Invalid argument: #{array}"
         
     | 
| 
      
 34 
     | 
    
         
            +
                    end
         
     | 
| 
       23 
35 
     | 
    
         
             
                  end
         
     | 
| 
       24 
36 
     | 
    
         
             
                end
         
     | 
| 
       25 
37 
     | 
    
         | 
| 
         @@ -52,6 +64,16 @@ module RedAmber 
     | 
|
| 
       52 
64 
     | 
    
         
             
                alias_method :to_a, :values
         
     | 
| 
       53 
65 
     | 
    
         
             
                alias_method :entries, :values
         
     | 
| 
       54 
66 
     | 
    
         | 
| 
      
 67 
     | 
    
         
            +
                def indices
         
     | 
| 
      
 68 
     | 
    
         
            +
                  (0...size).to_a
         
     | 
| 
      
 69 
     | 
    
         
            +
                end
         
     | 
| 
      
 70 
     | 
    
         
            +
                alias_method :indexes, :indices
         
     | 
| 
      
 71 
     | 
    
         
            +
                alias_method :indeces, :indices
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                def to_ary
         
     | 
| 
      
 74 
     | 
    
         
            +
                  to_a
         
     | 
| 
      
 75 
     | 
    
         
            +
                end
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
       55 
77 
     | 
    
         
             
                def size
         
     | 
| 
       56 
78 
     | 
    
         
             
                  # only defined :length in Arrow?
         
     | 
| 
       57 
79 
     | 
    
         
             
                  @data.length
         
     | 
| 
         @@ -60,6 +82,10 @@ module RedAmber 
     | 
|
| 
       60 
82 
     | 
    
         
             
                alias_method :n_rows, :size
         
     | 
| 
       61 
83 
     | 
    
         
             
                alias_method :nrow, :size
         
     | 
| 
       62 
84 
     | 
    
         | 
| 
      
 85 
     | 
    
         
            +
                def empty?
         
     | 
| 
      
 86 
     | 
    
         
            +
                  size.zero?
         
     | 
| 
      
 87 
     | 
    
         
            +
                end
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
       63 
89 
     | 
    
         
             
                def type
         
     | 
| 
       64 
90 
     | 
    
         
             
                  @data.value_type.nick.to_sym
         
     | 
| 
       65 
91 
     | 
    
         
             
                end
         
     | 
| 
         @@ -124,5 +150,9 @@ module RedAmber 
     | 
|
| 
       124 
150 
     | 
    
         
             
                def n_nans
         
     | 
| 
       125 
151 
     | 
    
         
             
                  numeric? ? is_nan.to_a.count(true) : 0
         
     | 
| 
       126 
152 
     | 
    
         
             
                end
         
     | 
| 
      
 153 
     | 
    
         
            +
             
     | 
| 
      
 154 
     | 
    
         
            +
                def has_nil?
         
     | 
| 
      
 155 
     | 
    
         
            +
                  is_nil.any
         
     | 
| 
      
 156 
     | 
    
         
            +
                end
         
     | 
| 
       127 
157 
     | 
    
         
             
              end
         
     | 
| 
       128 
158 
     | 
    
         
             
            end
         
     | 
| 
         @@ -16,11 +16,13 @@ module RedAmber 
     | 
|
| 
       16 
16 
     | 
    
         
             
                unary_aggregations.each do |function|
         
     | 
| 
       17 
17 
     | 
    
         
             
                  define_method(function) do |opts: nil|
         
     | 
| 
       18 
18 
     | 
    
         
             
                    datum = exec_func_unary(function, options: opts)
         
     | 
| 
       19 
     | 
    
         
            -
                     
     | 
| 
      
 19 
     | 
    
         
            +
                    get_scalar(datum)
         
     | 
| 
       20 
20 
     | 
    
         
             
                  end
         
     | 
| 
       21 
21 
     | 
    
         
             
                end
         
     | 
| 
       22 
22 
     | 
    
         
             
                alias_method :median, :approximate_median
         
     | 
| 
       23 
23 
     | 
    
         
             
                alias_method :count_uniq, :count_distinct
         
     | 
| 
      
 24 
     | 
    
         
            +
                alias_method :all?, :all
         
     | 
| 
      
 25 
     | 
    
         
            +
                alias_method :any?, :any
         
     | 
| 
       24 
26 
     | 
    
         | 
| 
       25 
27 
     | 
    
         
             
                def unbiased_variance
         
     | 
| 
       26 
28 
     | 
    
         
             
                  variance(opts: { ddof: 1 })
         
     | 
| 
         @@ -47,7 +49,7 @@ module RedAmber 
     | 
|
| 
       47 
49 
     | 
    
         
             
                unary_element_wise.each do |function|
         
     | 
| 
       48 
50 
     | 
    
         
             
                  define_method(function) do |opts: nil|
         
     | 
| 
       49 
51 
     | 
    
         
             
                    datum = exec_func_unary(function, options: opts)
         
     | 
| 
       50 
     | 
    
         
            -
                     
     | 
| 
      
 52 
     | 
    
         
            +
                    Vector.new(datum.value)
         
     | 
| 
       51 
53 
     | 
    
         
             
                  end
         
     | 
| 
       52 
54 
     | 
    
         
             
                end
         
     | 
| 
       53 
55 
     | 
    
         
             
                alias_method :is_nil, :is_null
         
     | 
| 
         @@ -72,12 +74,12 @@ module RedAmber 
     | 
|
| 
       72 
74 
     | 
    
         
             
                unary_element_wise_op.each do |function, operator|
         
     | 
| 
       73 
75 
     | 
    
         
             
                  define_method(function) do |opts: nil|
         
     | 
| 
       74 
76 
     | 
    
         
             
                    datum = exec_func_unary(function, options: opts)
         
     | 
| 
       75 
     | 
    
         
            -
                     
     | 
| 
      
 77 
     | 
    
         
            +
                    Vector.new(datum.value)
         
     | 
| 
       76 
78 
     | 
    
         
             
                  end
         
     | 
| 
       77 
79 
     | 
    
         | 
| 
       78 
80 
     | 
    
         
             
                  define_method(operator) do |opts: nil|
         
     | 
| 
       79 
81 
     | 
    
         
             
                    datum = exec_func_unary(function, options: opts)
         
     | 
| 
       80 
     | 
    
         
            -
                     
     | 
| 
      
 82 
     | 
    
         
            +
                    Vector.new(datum.value)
         
     | 
| 
       81 
83 
     | 
    
         
             
                  end
         
     | 
| 
       82 
84 
     | 
    
         
             
                end
         
     | 
| 
       83 
85 
     | 
    
         
             
                alias_method :not, :invert
         
     | 
| 
         @@ -95,7 +97,7 @@ module RedAmber 
     | 
|
| 
       95 
97 
     | 
    
         
             
                binary_element_wise.each do |function|
         
     | 
| 
       96 
98 
     | 
    
         
             
                  define_method(function) do |other, opts: nil|
         
     | 
| 
       97 
99 
     | 
    
         
             
                    datum = exec_func_binary(function, other, options: opts)
         
     | 
| 
       98 
     | 
    
         
            -
                     
     | 
| 
      
 100 
     | 
    
         
            +
                    Vector.new(datum.value)
         
     | 
| 
       99 
101 
     | 
    
         
             
                  end
         
     | 
| 
       100 
102 
     | 
    
         
             
                end
         
     | 
| 
       101 
103 
     | 
    
         | 
| 
         @@ -111,7 +113,7 @@ module RedAmber 
     | 
|
| 
       111 
113 
     | 
    
         
             
                logical_binary_element_wise.each do |method, function|
         
     | 
| 
       112 
114 
     | 
    
         
             
                  define_method(method) do |other, opts: nil|
         
     | 
| 
       113 
115 
     | 
    
         
             
                    datum = exec_func_binary(function, other, options: opts)
         
     | 
| 
       114 
     | 
    
         
            -
                     
     | 
| 
      
 116 
     | 
    
         
            +
                    Vector.new(datum.value)
         
     | 
| 
       115 
117 
     | 
    
         
             
                  end
         
     | 
| 
       116 
118 
     | 
    
         
             
                end
         
     | 
| 
       117 
119 
     | 
    
         | 
| 
         @@ -144,12 +146,12 @@ module RedAmber 
     | 
|
| 
       144 
146 
     | 
    
         
             
                binary_element_wise_op.each do |function, operator|
         
     | 
| 
       145 
147 
     | 
    
         
             
                  define_method(function) do |other, opts: nil|
         
     | 
| 
       146 
148 
     | 
    
         
             
                    datum = exec_func_binary(function, other, options: opts)
         
     | 
| 
       147 
     | 
    
         
            -
                     
     | 
| 
      
 149 
     | 
    
         
            +
                    Vector.new(datum.value)
         
     | 
| 
       148 
150 
     | 
    
         
             
                  end
         
     | 
| 
       149 
151 
     | 
    
         | 
| 
       150 
152 
     | 
    
         
             
                  define_method(operator) do |other, opts: nil|
         
     | 
| 
       151 
153 
     | 
    
         
             
                    datum = exec_func_binary(function, other, options: opts)
         
     | 
| 
       152 
     | 
    
         
            -
                     
     | 
| 
      
 154 
     | 
    
         
            +
                    Vector.new(datum.value)
         
     | 
| 
       153 
155 
     | 
    
         
             
                  end
         
     | 
| 
       154 
156 
     | 
    
         
             
                end
         
     | 
| 
       155 
157 
     | 
    
         
             
                alias_method :eq, :equal
         
     | 
| 
         @@ -159,8 +161,17 @@ module RedAmber 
     | 
|
| 
       159 
161 
     | 
    
         
             
                alias_method :lt, :less
         
     | 
| 
       160 
162 
     | 
    
         
             
                alias_method :ne, :not_equal
         
     | 
| 
       161 
163 
     | 
    
         | 
| 
      
 164 
     | 
    
         
            +
                def coerce(other)
         
     | 
| 
      
 165 
     | 
    
         
            +
                  case other
         
     | 
| 
      
 166 
     | 
    
         
            +
                  when Vector, Array, Arrow::Array
         
     | 
| 
      
 167 
     | 
    
         
            +
                    raise VectorArgumentError, "Size unmatch: #{size} != #{other.length}" unless size == other.length
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
                    [Vector.new(Array(other)), self]
         
     | 
| 
      
 170 
     | 
    
         
            +
                  end
         
     | 
| 
      
 171 
     | 
    
         
            +
                  [Vector.new(Array(other) * size), self]
         
     | 
| 
      
 172 
     | 
    
         
            +
                end
         
     | 
| 
      
 173 
     | 
    
         
            +
             
     | 
| 
       162 
174 
     | 
    
         
             
                # (array functions)
         
     | 
| 
       163 
     | 
    
         
            -
                # array_filter, array_take
         
     | 
| 
       164 
175 
     | 
    
         
             
                # dictionary_encode,
         
     | 
| 
       165 
176 
     | 
    
         
             
                # partition_nth_indices,
         
     | 
| 
       166 
177 
     | 
    
         
             
                # quarter, quarters_between,
         
     | 
| 
         @@ -192,17 +203,17 @@ module RedAmber 
     | 
|
| 
       192 
203 
     | 
    
         
             
                # strptime, subsecond, us_week, week, weeks_between, year, year_month_day, years_between
         
     | 
| 
       193 
204 
     | 
    
         | 
| 
       194 
205 
     | 
    
         
             
                # (onditional)
         
     | 
| 
       195 
     | 
    
         
            -
                # case_when, cast, 
     | 
| 
      
 206 
     | 
    
         
            +
                # case_when, cast,
         
     | 
| 
       196 
207 
     | 
    
         | 
| 
       197 
208 
     | 
    
         
             
                # (indices)
         
     | 
| 
       198 
209 
     | 
    
         
             
                # choose, index_in, index_in_meta_binary, indices_nonzero
         
     | 
| 
       199 
210 
     | 
    
         | 
| 
       200 
211 
     | 
    
         
             
                # (others)
         
     | 
| 
       201 
     | 
    
         
            -
                # coalesce, 
     | 
| 
       202 
     | 
    
         
            -
                #  
     | 
| 
      
 212 
     | 
    
         
            +
                # coalesce,
         
     | 
| 
      
 213 
     | 
    
         
            +
                # is_in_meta_binary,
         
     | 
| 
       203 
214 
     | 
    
         
             
                # list_element, list_flatten, list_parent_indices, list_value_length, make_struct,
         
     | 
| 
       204 
215 
     | 
    
         
             
                # max_element_wise, min_element_wise, random, select_k_unstable,
         
     | 
| 
       205 
     | 
    
         
            -
                #  
     | 
| 
      
 216 
     | 
    
         
            +
                # struct_field,
         
     | 
| 
       206 
217 
     | 
    
         | 
| 
       207 
218 
     | 
    
         
             
                private # =======
         
     | 
| 
       208 
219 
     | 
    
         | 
| 
         @@ -221,7 +232,7 @@ module RedAmber 
     | 
|
| 
       221 
232 
     | 
    
         
             
                  end
         
     | 
| 
       222 
233 
     | 
    
         
             
                end
         
     | 
| 
       223 
234 
     | 
    
         | 
| 
       224 
     | 
    
         
            -
                def  
     | 
| 
      
 235 
     | 
    
         
            +
                def get_scalar(datum)
         
     | 
| 
       225 
236 
     | 
    
         
             
                  output = datum.value
         
     | 
| 
       226 
237 
     | 
    
         
             
                  case output
         
     | 
| 
       227 
238 
     | 
    
         
             
                  when Arrow::StringScalar then output.to_s
         
     | 
| 
         @@ -232,10 +243,6 @@ module RedAmber 
     | 
|
| 
       232 
243 
     | 
    
         
             
                  end
         
     | 
| 
       233 
244 
     | 
    
         
             
                end
         
     | 
| 
       234 
245 
     | 
    
         | 
| 
       235 
     | 
    
         
            -
                def take_out_element_wise(datum)
         
     | 
| 
       236 
     | 
    
         
            -
                  Vector.new(datum.value)
         
     | 
| 
       237 
     | 
    
         
            -
                end
         
     | 
| 
       238 
     | 
    
         
            -
             
     | 
| 
       239 
246 
     | 
    
         
             
                module_function # ======
         
     | 
| 
       240 
247 
     | 
    
         | 
| 
       241 
248 
     | 
    
         
             
                def find(function_name)
         
     | 
| 
         @@ -0,0 +1,124 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
         
     | 
| 
      
 4 
     | 
    
         
            +
            # reference: https://arrow.apache.org/docs/cpp/compute.html
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            module RedAmber
         
     | 
| 
      
 7 
     | 
    
         
            +
              # mix-ins for class Vector
         
     | 
| 
      
 8 
     | 
    
         
            +
              # Functions to select some data.
         
     | 
| 
      
 9 
     | 
    
         
            +
              module VectorSelectable
         
     | 
| 
      
 10 
     | 
    
         
            +
                def drop_nil
         
     | 
| 
      
 11 
     | 
    
         
            +
                  datum = find(:drop_null).execute([data])
         
     | 
| 
      
 12 
     | 
    
         
            +
                  Vector.new(datum.value)
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                # vector calculation version of selection by indices
         
     | 
| 
      
 16 
     | 
    
         
            +
                # TODO: support for option {boundscheck: true}
         
     | 
| 
      
 17 
     | 
    
         
            +
                def take(*indices)
         
     | 
| 
      
 18 
     | 
    
         
            +
                  indices.flatten!
         
     | 
| 
      
 19 
     | 
    
         
            +
                  return Vector.new([]) if indices.empty?
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  indices = indices[0] if indices.one? && !indices[0].is_a?(Numeric)
         
     | 
| 
      
 22 
     | 
    
         
            +
                  indices = Vector.new(indices) unless indices.is_a?(Vector)
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                  take_by_vector(indices) # returns sub Vector
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                # TODO: support for option {null_selection_behavior: :drop}
         
     | 
| 
      
 28 
     | 
    
         
            +
                def filter(*booleans)
         
     | 
| 
      
 29 
     | 
    
         
            +
                  booleans.flatten!
         
     | 
| 
      
 30 
     | 
    
         
            +
                  return Vector.new([]) if booleans.empty?
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                  b = booleans[0]
         
     | 
| 
      
 33 
     | 
    
         
            +
                  boolean_array =
         
     | 
| 
      
 34 
     | 
    
         
            +
                    case b
         
     | 
| 
      
 35 
     | 
    
         
            +
                    when Vector
         
     | 
| 
      
 36 
     | 
    
         
            +
                      raise VectorTypeError, 'Argument is not a boolean.' unless b.boolean?
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                      b.data
         
     | 
| 
      
 39 
     | 
    
         
            +
                    when Arrow::BooleanArray
         
     | 
| 
      
 40 
     | 
    
         
            +
                      b
         
     | 
| 
      
 41 
     | 
    
         
            +
                    else
         
     | 
| 
      
 42 
     | 
    
         
            +
                      raise VectorTypeError, 'Argument is not a boolean.' unless booleans?(booleans)
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                      Arrow::BooleanArray.new(booleans)
         
     | 
| 
      
 45 
     | 
    
         
            +
                    end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                  filter_by_array(boolean_array) # returns sub Vector
         
     | 
| 
      
 48 
     | 
    
         
            +
                end
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                #   @param indices
         
     | 
| 
      
 51 
     | 
    
         
            +
                #   @param booleans
         
     | 
| 
      
 52 
     | 
    
         
            +
                def [](*args)
         
     | 
| 
      
 53 
     | 
    
         
            +
                  args.flatten!
         
     | 
| 
      
 54 
     | 
    
         
            +
                  return Vector.new([]) if args.empty?
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                  arg = args[0]
         
     | 
| 
      
 57 
     | 
    
         
            +
                  case arg
         
     | 
| 
      
 58 
     | 
    
         
            +
                  when Vector
         
     | 
| 
      
 59 
     | 
    
         
            +
                    return take_by_vector(arg) if arg.numeric?
         
     | 
| 
      
 60 
     | 
    
         
            +
                    return filter_by_array(arg.data) if arg.boolean?
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                    raise VectorTypeError, "Argument must be numeric or boolean: #{arg}"
         
     | 
| 
      
 63 
     | 
    
         
            +
                  when Arrow::BooleanArray
         
     | 
| 
      
 64 
     | 
    
         
            +
                    return filter_by_array(arg)
         
     | 
| 
      
 65 
     | 
    
         
            +
                  when Arrow::Array
         
     | 
| 
      
 66 
     | 
    
         
            +
                    array = arg
         
     | 
| 
      
 67 
     | 
    
         
            +
                  else
         
     | 
| 
      
 68 
     | 
    
         
            +
                    unless arg.is_a?(Numeric) || booleans?([arg])
         
     | 
| 
      
 69 
     | 
    
         
            +
                      raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
         
     | 
| 
      
 70 
     | 
    
         
            +
                    end
         
     | 
| 
      
 71 
     | 
    
         
            +
                  end
         
     | 
| 
      
 72 
     | 
    
         
            +
                  array ||= Arrow::Array.new(args)
         
     | 
| 
      
 73 
     | 
    
         
            +
                  return filter_by_array(array) if array.is_a?(Arrow::BooleanArray)
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
                  vector = Vector.new(array)
         
     | 
| 
      
 76 
     | 
    
         
            +
                  return take_by_vector(vector) if vector.numeric?
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                  raise VectorArgumentError, "Invalid argument: #{args}"
         
     | 
| 
      
 79 
     | 
    
         
            +
                end
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                #   @param values [Array, Arrow::Array, Vector]
         
     | 
| 
      
 82 
     | 
    
         
            +
                def is_in(*values)
         
     | 
| 
      
 83 
     | 
    
         
            +
                  values.flatten!
         
     | 
| 
      
 84 
     | 
    
         
            +
                  array =
         
     | 
| 
      
 85 
     | 
    
         
            +
                    case values[0]
         
     | 
| 
      
 86 
     | 
    
         
            +
                    when Vector
         
     | 
| 
      
 87 
     | 
    
         
            +
                      values[0].data
         
     | 
| 
      
 88 
     | 
    
         
            +
                    when Arrow::Array
         
     | 
| 
      
 89 
     | 
    
         
            +
                      values[0]
         
     | 
| 
      
 90 
     | 
    
         
            +
                    end
         
     | 
| 
      
 91 
     | 
    
         
            +
                  array ||= data.class.new(values)
         
     | 
| 
      
 92 
     | 
    
         
            +
                  Vector.new(data.is_in(array))
         
     | 
| 
      
 93 
     | 
    
         
            +
                end
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
                # Arrow's support required
         
     | 
| 
      
 96 
     | 
    
         
            +
                def index(element)
         
     | 
| 
      
 97 
     | 
    
         
            +
                  to_a.index(element)
         
     | 
| 
      
 98 
     | 
    
         
            +
                end
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                private
         
     | 
| 
      
 101 
     | 
    
         
            +
             
     | 
| 
      
 102 
     | 
    
         
            +
                # Accepts indices by numeric Vector
         
     | 
| 
      
 103 
     | 
    
         
            +
                def take_by_vector(indices)
         
     | 
| 
      
 104 
     | 
    
         
            +
                  raise VectorTypeError, "Indices must be numeric Vector: #{indices}" unless indices.numeric?
         
     | 
| 
      
 105 
     | 
    
         
            +
                  raise VectorArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
                  normalized_indices = (indices < 0).if_else(indices + size, indices) # normalize index from tail
         
     | 
| 
      
 108 
     | 
    
         
            +
                  raise VectorArgumentError, "Index out of range: #{normalized_indices.max}" if normalized_indices.max >= size
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                  index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
                  datum = find(:array_take).execute([data, index_array])
         
     | 
| 
      
 113 
     | 
    
         
            +
                  Vector.new(datum.value)
         
     | 
| 
      
 114 
     | 
    
         
            +
                end
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                # Accepts booleans by Arrow::BooleanArray
         
     | 
| 
      
 117 
     | 
    
         
            +
                def filter_by_array(boolean_array)
         
     | 
| 
      
 118 
     | 
    
         
            +
                  raise VectorArgumentError, 'Booleans must be same size as self.' unless boolean_array.length == size
         
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
                  datum = find(:array_filter).execute([data, boolean_array])
         
     | 
| 
      
 121 
     | 
    
         
            +
                  Vector.new(datum.value)
         
     | 
| 
      
 122 
     | 
    
         
            +
                end
         
     | 
| 
      
 123 
     | 
    
         
            +
              end
         
     | 
| 
      
 124 
     | 
    
         
            +
            end
         
     |