RubyGems - red_amber - Versions diffs - 0.2.1 → 0.2.3 - Mend

red_amber 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +4 -4
data/.rubocop.yml +15 -0
data/CHANGELOG.md +170 -20
data/Gemfile +4 -2
data/README.md +121 -302
data/benchmark/basic.yml +79 -0
data/benchmark/combine.yml +63 -0
data/benchmark/drop_nil.yml +15 -3
data/benchmark/group.yml +33 -0
data/benchmark/reshape.yml +27 -0
data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
data/benchmark/rover/flights.yml +23 -0
data/benchmark/rover/penguins.yml +23 -0
data/benchmark/rover/planes.yml +23 -0
data/benchmark/rover/weather.yml +23 -0
data/doc/DataFrame.md +611 -318
data/doc/Vector.md +31 -36
data/doc/image/basic_verbs.png +0 -0
data/doc/image/dataframe/assign.png +0 -0
data/doc/image/dataframe/assign_operation.png +0 -0
data/doc/image/dataframe/drop.png +0 -0
data/doc/image/dataframe/join.png +0 -0
data/doc/image/dataframe/pick.png +0 -0
data/doc/image/dataframe/pick_operation.png +0 -0
data/doc/image/dataframe/remove.png +0 -0
data/doc/image/dataframe/rename.png +0 -0
data/doc/image/dataframe/rename_operation.png +0 -0
data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
data/doc/image/dataframe/set_and_bind.png +0 -0
data/doc/image/dataframe/slice.png +0 -0
data/doc/image/dataframe/slice_operation.png +0 -0
data/doc/image/dataframe_model.png +0 -0
data/doc/image/group_operation.png +0 -0
data/doc/image/replace-if_then.png +0 -0
data/doc/image/reshaping_dataframe.png +0 -0
data/doc/image/screenshot.png +0 -0
data/doc/image/vector/binary_element_wise.png +0 -0
data/doc/image/vector/unary_aggregation.png +0 -0
data/doc/image/vector/unary_aggregation_w_option.png +0 -0
data/doc/image/vector/unary_element_wise.png +0 -0
data/lib/red_amber/data_frame.rb +16 -42
data/lib/red_amber/data_frame_combinable.rb +283 -0
data/lib/red_amber/data_frame_displayable.rb +58 -3
data/lib/red_amber/data_frame_loadsave.rb +36 -0
data/lib/red_amber/data_frame_reshaping.rb +8 -6
data/lib/red_amber/data_frame_selectable.rb +9 -9
data/lib/red_amber/data_frame_variable_operation.rb +27 -21
data/lib/red_amber/group.rb +100 -17
data/lib/red_amber/helper.rb +20 -30
data/lib/red_amber/vector.rb +56 -30
data/lib/red_amber/vector_functions.rb +0 -8
data/lib/red_amber/vector_selectable.rb +9 -1
data/lib/red_amber/vector_updatable.rb +61 -63
data/lib/red_amber/version.rb +1 -1
data/lib/red_amber.rb +2 -0
data/red_amber.gemspec +1 -1
metadata +32 -11
data/doc/examples_of_red_amber.ipynb +0 -8979

data/doc/Vector.md CHANGED Viewed

@@ -7,7 +7,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
 ### Create from a column in a DataFrame
   ```ruby
-  df = RedAmber::DataFrame.new(x: [1, 2, 3])
+  df = DataFrame.new(x: [1, 2, 3])
   df[:x]
   # =>
   #<RedAmber::Vector(:uint8, size=3):0x000000000000f4ec>
@@ -17,13 +17,16 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
 ### New from an Array
   ```ruby
-  vector = RedAmber::Vector.new([1, 2, 3])
+  vector = Vector.new([1, 2, 3])
   # or
-  vector = RedAmber::Vector.new(1, 2, 3)
+  vector = Vector.new(1, 2, 3)
   # or
-  vector = RedAmber::Vector.new(1..3)
+  vector = Vector.new(1..3)
   # or
-  vector = RedAmber::Vector.new(Arrow::Array([1, 2, 3])
+  vector = Vector.new(Arrow::Array.new([1, 2, 3])
+  # or
+  require 'arrow-numo-narray'
+  vector = Vector.new(Numo::Int8[1, 2, 3])
   # =>
   #<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
@@ -61,7 +64,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
 ### `type_class`
-### `each`
+### `each`, `map`, `collect`
   If block is not given, returns Enumerator.
@@ -78,7 +81,7 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
   - `limit` sets size limit to display a long array.
     ```ruby
-    vector = RedAmber::Vector.new((1..50).to_a)
+    vector = Vector.new((1..50).to_a)
     # =>
     #<RedAmber::Vector(:uint8, size=50):0x000000000000f528>
     [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ... ]
@@ -95,8 +98,8 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
 - Negative index is also OK like the Ruby's primitive Array.
 ```ruby
-array = RedAmber::Vector.new(%w[A B C D E])
-indices = RedAmber::Vector.new([0.1, -0.5, -5.1])
+array = Vector.new(%w[A B C D E])
+indices = Vector.new([0.1, -0.5, -5.1])
 array.take(indices)
 # or
 array[indices]
@@ -106,7 +109,7 @@ array[indices]
 ["A", "E", "A"]
 ```
-### `filter(booleans)`, `[](booleans)`
+### `filter(booleans)`, `select(booleans)`, `[](booleans)`
 - Acceptable class for booleans:
   - An array of true, false, or nil
@@ -114,7 +117,7 @@ array[indices]
   - Arrow::BooleanArray
 ```ruby
-array = RedAmber::Vector.new(%w[A B C D E])
+array = Vector.new(%w[A B C D E])
 booleans = [true, false, nil, false, true]
 array.filter(booleans)
 # or
@@ -124,6 +127,7 @@ array[booleans]
 #<RedAmber::Vector(:string, size=2):0x000000000000f21c>
 ["A", "E"]
 ```
+`filter` and `select` also accepts a block.
 ## Functions
@@ -158,7 +162,7 @@ Options can be used as follows.
 See the [document of C++ function](https://arrow.apache.org/docs/cpp/compute.html) for detail.
 ```ruby
-double = RedAmber::Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
+double = Vector.new([1, 0/0.0, -1/0.0, 1/0.0, nil, ""])
 #=>
 #<RedAmber::Vector(:double, size=6):0x000000000000f910>
 [1.0, NaN, -Infinity, Infinity, nil, 0.0]
@@ -168,7 +172,7 @@ double.count(mode: :only_valid) #=> 5, default
 double.count(mode: :only_null) #=> 1
 double.count(mode: :all) #=> 6
-boolean = RedAmber::Vector.new([true, true, nil])
+boolean = Vector.new([true, true, nil])
 #=>
 #<RedAmber::Vector(:boolean, size=3):0x000000000000f924>
 [true, true, nil]
@@ -215,7 +219,7 @@ Examples of options for `#round`;
 - `round_mode` Specify rounding mode.
 ```ruby
-double = RedAmber::Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
+double = Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])
 # => [15.15, 2.5, 3.5, -4.5, -5.5]
 double.round
 # => [15.0, 2.0, 4.0, -4.0, -6.0]
@@ -293,7 +297,7 @@ double.round(n_digits: -1)
   array = [0.0/0, Float::NAN]
   array.tally #=> {NaN=>1, NaN=>1}
-  vector = RedAmber::Vector.new(array)
+  vector = Vector.new(array)
   vector.tally #=> {NaN=>2}
   vector.value_counts #=> {NaN=>2}
   ```
@@ -310,7 +314,7 @@ double.round(n_digits: -1)
 ## Coerce
 ```ruby
-vector = RedAmber::Vector.new(1,2,3)
+vector = Vector.new(1,2,3)
 # =>
 #<RedAmber::Vector(:uint8, size=3):0x00000000000decc4>
 [1, 2, 3]
@@ -340,12 +344,13 @@ vector * -1
 - Accepts Scalar, Range  of Integer, Vector, Array, Arrow::Array as a specifier
 - Accepts Scalar, Vector, Array and Arrow::Array as a replacer.
 - Boolean specifiers specify the position of replacer in true.
+  - If booleans.any is false, no replacement happen and return self.
 - Index specifiers specify the position of replacer in indices.
 - replacer specifies the values to be replaced.
   - The number of true in booleans must be equal to the length of replacer
 ```ruby
-vector = RedAmber::Vector.new([1, 2, 3])
+vector = Vector.new([1, 2, 3])
 booleans = [true, false, true]
 replacer = [4, 5]
 vector.replace(booleans, replacer)
@@ -379,7 +384,7 @@ vector.replace(booleans, replacer)
 ```ruby
 booleans = [true, false, nil]
 replacer = -1
-vec.replace(booleans, replacer)
+vector.replace(booleans, replacer)
 =>
 #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
 [-1, 2, nil]
@@ -390,17 +395,7 @@ vec.replace(booleans, replacer)
 ```ruby
 booleans = [true, false, true]
 replacer = [nil]
-vec.replace(booleans, replacer)
-=>
-#<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
-[nil, 2, nil]
-```
-- If no replacer specified, it is same as to specify nil.
-```ruby
-booleans = [true, false, true]
-vec.replace(booleans)
+vector.replace(booleans, replacer)
 =>
 #<RedAmber::Vector(:int8, size=3):0x00000000000304d0>
 [nil, 2, nil]
@@ -409,7 +404,7 @@ vec.replace(booleans)
 - An example to replace 'NA' to nil.
 ```ruby
-vector = RedAmber::Vector.new(['A', 'B', 'NA'])
+vector = Vector.new(['A', 'B', 'NA'])
 vector.replace(vector == 'NA', nil)
 # =>
 #<RedAmber::Vector(:string, size=3):0x000000000000f8ac>
@@ -421,7 +416,7 @@ vector.replace(vector == 'NA', nil)
 Specified indices are used 'as sorted'. Position in indices and replacer may not have correspondence.
 ```ruby
-vector = RedAmber::Vector.new([1, 2, 3])
+vector = Vector.new([1, 2, 3])
 indices = [2, 1]
 replacer = [4, 5]
 vector.replace(indices, replacer)
@@ -437,7 +432,7 @@ Propagate the last valid observation forward (or backward).
 Or preserve nil if all previous values are nil or at the end.
 ```ruby
-integer = RedAmber::Vector.new([0, 1, nil, 3, nil])
+integer = Vector.new([0, 1, nil, 3, nil])
 integer.fill_nil_forward
 # =>
 #<RedAmber::Vector(:uint8, size=5):0x000000000000f960>
@@ -459,7 +454,7 @@ Choose values based on self. Self must be a boolean Vector.
 This example will normalize negative indices to positive ones.
 ```ruby
-indices = RedAmber::Vector.new([1, -1, 3, -4])
+indices = Vector.new([1, -1, 3, -4])
 array_size = 10
 normalized_indices = (indices < 0).if_else(indices + array_size, indices)
@@ -474,7 +469,7 @@ For each element in self, return true if it is found in given `values`, false ot
 By default, nulls are matched against the value set. (This will be changed in SetLookupOptions: not impremented.)
 ```ruby
-vector = RedAmber::Vector.new %W[A B C D]
+vector = Vector.new %W[A B C D]
 values = ['A', 'C', 'X']
 vector.is_in(values)
@@ -486,7 +481,7 @@ vector.is_in(values)
 `values` are casted to the same Class of Vector.
 ```ruby
-vector = RedAmber::Vector.new([1, 2, 255])
+vector = Vector.new([1, 2, 255])
 vector.is_in(1, -1)
 # =>
@@ -499,7 +494,7 @@ vector.is_in(1, -1)
 Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
 ```ruby
-vector = RedAmber::Vector.new([1, 2, 3, 4, 5])
+vector = Vector.new([1, 2, 3, 4, 5])
 vector.shift
 # =>

data/doc/image/basic_verbs.png ADDED Viewed

Binary file

data/doc/image/dataframe/assign.png CHANGED Viewed

Binary file

data/doc/image/dataframe/assign_operation.png ADDED Viewed

Binary file

data/doc/image/dataframe/drop.png CHANGED Viewed

Binary file

data/doc/image/dataframe/join.png ADDED Viewed

Binary file

data/doc/image/dataframe/pick.png CHANGED Viewed

Binary file

data/doc/image/dataframe/pick_operation.png ADDED Viewed

Binary file

data/doc/image/dataframe/remove.png CHANGED Viewed

Binary file

data/doc/image/dataframe/rename.png CHANGED Viewed

Binary file

data/doc/image/dataframe/rename_operation.png ADDED Viewed

Binary file

data/doc/image/dataframe/reshaping_DataFrames.png ADDED Viewed

Binary file

data/doc/image/dataframe/set_and_bind.png ADDED Viewed

Binary file

data/doc/image/dataframe/slice.png CHANGED Viewed

Binary file

data/doc/image/dataframe/slice_operation.png ADDED Viewed

Binary file

data/doc/image/dataframe_model.png CHANGED Viewed

Binary file

data/doc/image/group_operation.png ADDED Viewed

Binary file

data/doc/image/replace-if_then.png ADDED Viewed

Binary file

data/doc/image/reshaping_dataframe.png ADDED Viewed

Binary file

data/doc/image/screenshot.png ADDED Viewed

Binary file

data/doc/image/vector/binary_element_wise.png CHANGED Viewed

Binary file

data/doc/image/vector/unary_aggregation.png CHANGED Viewed

Binary file

data/doc/image/vector/unary_aggregation_w_option.png CHANGED Viewed

Binary file

data/doc/image/vector/unary_element_wise.png CHANGED Viewed

Binary file

data/lib/red_amber/data_frame.rb CHANGED Viewed

@@ -5,8 +5,10 @@ module RedAmber
   # Variable @table holds an Arrow::Table object.
   class DataFrame
     # mix-in
+    include DataFrameCombinable
     include DataFrameDisplayable
     include DataFrameIndexable
+    include DataFrameLoadSave
     include DataFrameReshaping
     include DataFrameSelectable
     include DataFrameVariableOperation
@@ -37,10 +39,15 @@ module RedAmber
         # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
         #   returns empty DataFrame
         @table = Arrow::Table.new({}, [])
+      in [->(x) { x.respond_to?(:to_arrow) } => arrowable]
+        table = arrowable.to_arrow
+        unless table.is_a?(Arrow::Table)
+          raise DataFrameTypeError,
+                "to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
+        end
+        @table = table
       in [Arrow::Table => table]
         @table = table
-      in [DataFrame => dataframe]
-        @table = dataframe.table
       in [rover_or_hash]
         begin
           # Accepts Rover::DataFrame or Hash
@@ -52,10 +59,9 @@ module RedAmber
         @table = Arrow::Table.new(*args)
       end
       name_unnamed_keys
-    end
-    def self.load(path, options = {})
-      DataFrame.new(Arrow::Table.load(path, options))
+      duplicated_keys = keys.tally.select { |_k, v| v > 1 }.keys
+      raise DataFrameArgumentError, "duplicate keys: #{duplicated_keys}" unless duplicated_keys.empty?
     end
     attr_reader :table
@@ -64,18 +70,15 @@ module RedAmber
       @table
     end
-    def save(output, options = {})
-      @table.save(output, options)
-    end
     # Returns the number of rows.
     #
     # @return [Integer] Number of rows.
     def size
       @table.n_rows
     end
-    alias_method :n_rows, :size
+    alias_method :n_records, :size
     alias_method :n_obs, :size
+    alias_method :n_rows, :size
     # Returns the number of columns.
     #
@@ -83,8 +86,9 @@ module RedAmber
     def n_keys
       @table.n_columns
     end
-    alias_method :n_cols, :n_keys
+    alias_method :n_variables, :n_keys
     alias_method :n_vars, :n_keys
+    alias_method :n_cols, :n_keys
     # Returns the numbers of rows and columns.
     #
@@ -171,7 +175,7 @@ module RedAmber
     #   - indices(1) #=> [1, 2, 3, 4, 5]
     #   - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
     def indices(start = 0)
-      (start..).take(size)
+      Vector.new((start..).take(size))
     end
     alias_method :indexes, :indices
@@ -215,17 +219,6 @@ module RedAmber
       Rover::DataFrame.new(to_h)
     end
-    def to_iruby
-      require 'iruby'
-      return ['text/plain', '(empty DataFrame)'] if empty?
-      if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table') == 'TDR'
-        size <= 5 ? ['text/plain', tdr_str(tally: 0)] : ['text/plain', tdr_str]
-      else
-        ['text/html', html_table]
-      end
-    end
     def group(*group_keys, &block)
       g = Group.new(self, group_keys)
       g = g.summarize(&block) if block
@@ -260,25 +253,6 @@ module RedAmber
       ary[%i[variables keys vectors].index(var)]
     end
-    def html_table
-      reduced = size > 8 ? self[0..4, -4..-1] : self
-      converted = reduced.assign do
-        vectors.select.with_object({}) do |vector, assigner|
-          if vector.has_nil?
-            assigner[vector.key] = vector.to_a.map do |e|
-              e = e.nil? ? '<i>(nil)</i>' : e.to_s # nil
-              e = '""' if e.empty? # empty string
-              e.sub(/(\s+)/, '"\1"') # blank spaces
-            end
-          end
-        end
-      end
-      html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
-      "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
-    end
     def name_unnamed_keys
       return unless @table[:'']

data/lib/red_amber/data_frame_combinable.rb ADDED Viewed

@@ -0,0 +1,283 @@
+# frozen_string_literal: true
+module RedAmber
+  # mix-in for the class DataFrame
+  module DataFrameCombinable
+    # Concatenate other dataframe onto the bottom.
+    #
+    # @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
+    #   DataFrame/Table to concatenate onto the bottom of self.
+    # @return [DataFrame]
+    #   Concatenated dataframe.
+    def concatenate(*other)
+      case other
+      in [] | [nil] | [[]]
+        return self
+      in [Array => array]
+        # Nop
+      else
+        array = other
+      end
+      table_array = array.map do |e|
+        case e
+        when Arrow::Table
+          e
+        when DataFrame
+          e.table
+        else
+          raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
+        end
+      end
+      DataFrame.new(table.concatenate(table_array))
+    end
+    alias_method :concat, :concatenate
+    alias_method :bind_rows, :concatenate
+    # Merge other DataFrame or Table from other.
+    # - Self and other must have same size.
+    # - Self and other do not share the same key.
+    #   - If they share any keys, raise Error.
+    # @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
+    #   DataFrame/Table to concatenate.
+    # @return [DataFrame]
+    #   Merged dataframe.
+    def merge(*other)
+      case other
+      in [] | [nil] | [[]]
+        return self
+      in [Array => array]
+        # Nop
+      else
+        array = other
+      end
+      hash = array.each_with_object({}) do |e, h|
+        df =
+          case e
+          when Arrow::Table
+            DataFrame.new(e)
+          when DataFrame
+            e
+          else
+            raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
+          end
+        raise DataFrameArgumentError, "#{e} do not have same size as self" if size != df.size
+        k = keys.intersection(df.keys).any?
+        raise DataFrameArgumentError, "There are some shared keys: #{k}" if k
+        h.merge!(df.to_h)
+      end
+      assign(hash)
+    end
+    alias_method :bind_cols, :merge
+    # Mutating joins
+    # Join data, leaving only the matching records.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @param join_keys [String, Symbol, ::Array<String, Symbol>] Keys to match.
+    # @return [DataFrame] Joined dataframe.
+    #
+    def inner_join(other, join_keys = nil, suffix: '.1')
+      join(other, join_keys, type: :inner, suffix: suffix)
+    end
+    # Join data, leaving all records.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @param join_keys [String, Symbol, ::Array<String, Symbol>] Keys to match.
+    # @return [DataFrame] Joined dataframe.
+    #
+    def full_join(other, join_keys = nil, suffix: '.1')
+      join(other, join_keys, type: :full_outer, suffix: suffix)
+    end
+    alias_method :outer_join, :full_join
+    # Join matching values to self from other.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @param join_keys [String, Symbol, ::Array<String, Symbol>] Keys to match.
+    # @return [DataFrame] Joined dataframe.
+    #
+    def left_join(other, join_keys = nil, suffix: '.1')
+      join(other, join_keys, type: :left_outer, suffix: suffix)
+    end
+    # Join matching values from self to other.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @param join_keys [String, Symbol, ::Array<String, Symbol>] Keys to match.
+    # @return [DataFrame] Joined dataframe.
+    #
+    def right_join(other, join_keys = nil, suffix: '.1')
+      join(other, join_keys, type: :right_outer, suffix: suffix)
+    end
+    # Filtering joins
+    # Return records of self that have a match in other.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @param join_keys [String, Symbol, ::Array<String, Symbol>] Keys to match.
+    # @return [DataFrame] Joined dataframe.
+    #
+    def semi_join(other, join_keys = nil, suffix: '.1')
+      join(other, join_keys, type: :left_semi, suffix: suffix)
+    end
+    # Return records of self that do not have a match in other.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @param join_keys [String, Symbol, ::Array<String, Symbol>] Keys to match.
+    # @return [DataFrame] Joined dataframe.
+    #
+    def anti_join(other, join_keys = nil, suffix: '.1')
+      join(other, join_keys, type: :left_anti, suffix: suffix)
+    end
+    # Set operations
+    # Check if set operation with self and other is possible.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be checked with self.
+    # @return [Boolean] true if set operation is possible.
+    #
+    def set_operable?(other) # rubocop:disable Naming/AccessorMethodName
+      other = DataFrame.new(other) if other.is_a?(Arrow::Table)
+      keys == other.keys
+    end
+    # Select records appearing in both self and other.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @return [DataFrame] Joined dataframe.
+    #
+    def intersect(other)
+      other = DataFrame.new(other) if other.is_a?(Arrow::Table)
+      raise DataFrameArgumentError, 'keys are not same with self and other' unless keys == other.keys
+      join(other, keys, type: :inner)
+    end
+    # Select records appearing in self or other.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @return [DataFrame] Joined dataframe.
+    #
+    def union(other)
+      other = DataFrame.new(other) if other.is_a?(Arrow::Table)
+      raise DataFrameArgumentError, 'keys are not same with self and other' unless keys == other.keys
+      join(other, keys, type: :full_outer)
+    end
+    # Select records appearing in self but not in other.
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @return [DataFrame] Joined dataframe.
+    #
+    def difference(other)
+      other = DataFrame.new(other) if other.is_a?(Arrow::Table)
+      raise DataFrameArgumentError, 'keys are not same with self and other' unless keys == other.keys
+      join(other, keys, type: :left_anti)
+    end
+    alias_method :setdiff, :difference
+    # Undocumented. It is preferable to call specific methods.
+    # Join other dataframe
+    #
+    # @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
+    # @param join_keys [String, Symbol, ::Array<String, Symbol>] Keys to match.
+    # @return [DataFrame] Joined dataframe.
+    #
+    #   :type is one of
+    #     :left_semi, :right_semi, :left_anti, :right_anti inner, :left_outer, :right_outer, :full_outer.
+    def join(other, join_keys = nil, type: :inner, suffix: '.1', left_outputs: nil, right_outputs: nil)
+      case other
+      when DataFrame
+        # Nop
+      when Arrow::Table
+        other = DataFrame.new(other)
+      else
+        raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
+      end
+      # Support natural keys (implicit common keys)
+      natural_keys = keys.intersection(other.keys)
+      raise DataFrameArgumentError, "#{join_keys} are not common keys" if natural_keys.empty?
+      join_keys =
+        if join_keys
+          Array(join_keys).map(&:to_sym)
+        else
+          natural_keys
+        end
+      return self if join_keys.empty?
+      # Support partial join_keys (common key other than join_key will be renamed with suffix)
+      remainer_keys = natural_keys - join_keys
+      unless remainer_keys.empty?
+        renamer = remainer_keys.each_with_object({}) do |key, hash|
+          new_key = nil
+          loop do
+            new_key = "#{key}#{suffix}".to_sym
+            break unless keys.include?(new_key)
+            s = suffix.succ
+            raise DataFrameArgumentError, "suffix #{suffix} is invalid" if s == suffix
+            suffix = s
+          end
+          hash[key] = new_key
+        end
+        other = other.rename(renamer)
+      end
+      # Red Arrow's #join returns duplicated join_keys from self and other as of v9.0.0 .
+      # Temporally merge key vectors here to workaround.
+      table_output =
+        table.join(other.table, join_keys, type: type, left_outputs: left_outputs, right_outputs: right_outputs)
+      left_indexes = [*0...n_keys]
+      right_indexes = [*((other.keys - join_keys).map { |key| other.keys.index(key) + n_keys })]
+      case type
+      when :left_semi, :left_anti, :right_semi, :right_anti
+        return DataFrame.new(table_output)
+      else
+        selected_indexes = left_indexes.concat(right_indexes)
+      end
+      merged_columns = join_keys.map do |key|
+        i = keys.index(key)
+        merge_column(table_output[i], table_output[n_keys + i], type)
+      end
+      DataFrame.new(table_output[selected_indexes])
+               .assign(*join_keys) { merged_columns }
+    end
+    private
+    def merge_column(column1, column2, type)
+      a1 = column1.to_a
+      a2 = column2.to_a
+      if type == :full_outer
+        a1.zip(a2).map { |x, y| x || y }
+      elsif type.start_with?('right')
+        a2
+      else # :inner or :left-*
+        a1
+      end
+    end
+  end
+end