RubyGems - red_amber - Versions diffs - 0.4.2 → 0.5.1 - Mend

red_amber 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +4 -4
data/.devcontainer/Dockerfile +75 -0
data/.devcontainer/devcontainer.json +38 -0
data/.devcontainer/onCreateCommand.sh +22 -0
data/.rubocop.yml +11 -5
data/CHANGELOG.md +141 -17
data/Gemfile +5 -6
data/README.ja.md +271 -0
data/README.md +52 -31
data/Rakefile +55 -0
data/benchmark/group.yml +12 -5
data/doc/Dev_Containers.ja.md +290 -0
data/doc/Dev_Containers.md +292 -0
data/doc/qmd/examples_of_red_amber.qmd +4596 -0
data/doc/qmd/red-amber.qmd +90 -0
data/docker/Dockerfile +2 -2
data/docker/Gemfile +8 -3
data/docker/docker-compose.yml +1 -1
data/docker/readme.md +5 -5
data/lib/red_amber/data_frame.rb +78 -4
data/lib/red_amber/data_frame_combinable.rb +147 -119
data/lib/red_amber/data_frame_displayable.rb +7 -6
data/lib/red_amber/data_frame_loadsave.rb +1 -1
data/lib/red_amber/data_frame_selectable.rb +51 -2
data/lib/red_amber/data_frame_variable_operation.rb +6 -6
data/lib/red_amber/group.rb +476 -127
data/lib/red_amber/helper.rb +26 -0
data/lib/red_amber/subframes.rb +18 -11
data/lib/red_amber/vector.rb +45 -25
data/lib/red_amber/vector_aggregation.rb +26 -0
data/lib/red_amber/vector_selectable.rb +124 -40
data/lib/red_amber/vector_string_function.rb +279 -0
data/lib/red_amber/vector_unary_element_wise.rb +4 -0
data/lib/red_amber/vector_updatable.rb +28 -0
data/lib/red_amber/version.rb +1 -1
data/lib/red_amber.rb +2 -1
data/red_amber.gemspec +3 -3
metadata +19 -14
data/docker/Gemfile.lock +0 -80
data/docker/example +0 -74
data/docker/notebook/examples_of_red_amber.ipynb +0 -8562
data/docker/notebook/red-amber.ipynb +0 -188

data/lib/red_amber/group.rb CHANGED Viewed

@@ -4,6 +4,7 @@ module RedAmber
   # Group class
   class Group
     include Enumerable # This feature is experimental
+    include Helper
     using RefineArrowTable
@@ -25,12 +26,7 @@ module RedAmber
       private
       # @!macro [attach] define_group_aggregation
-      #   @!method $1(*summary_keys)
-      #     Group aggregation function `$1`.
-      #     @param summary_keys [Array<Symbol, String>]
-      #       summary keys.
-      #     @return [DataFrame]
-      #       aggregated DataFrame
+      #   Returns aggregated DataFrame.
       #
       def define_group_aggregation(function)
         define_method(function) do |*summary_keys|
@@ -54,7 +50,7 @@ module RedAmber
     # @param group_keys [Array<Symbol, String>]
     #   keys for grouping.
     # @return [Group]
-    #   Group object.
+    #   Group object. It inspects grouped columns and its count.
     # @example
     #   Group.new(penguins, :species)
     #
@@ -78,13 +74,93 @@ module RedAmber
       @group = @dataframe.table.group(*@group_keys)
     end
-    define_group_aggregation(:count)
+    # @!macro group_aggregation
+    #   @param group_keys [Array<Symbol, String>]
+    #     keys for grouping.
+    #   @return [DataFrame]
+    #     aggregated DataFrame
+    # Whether all elements in each group evaluate to true.
+    #
+    # @!method all(*group_keys)
+    #   @macro group_aggregation
+    #   @example For boolean columns by default.
+    #     dataframe
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000230dc>
+    #             x y        z
+    #       <uint8> <string> <boolean>
+    #     0       1 A        false
+    #     1       2 A        true
+    #     2       3 B        false
+    #     3       4 B        (nil)
+    #     4       5 B        true
+    #     5       6 C        false
+    #
+    #     dataframe.group(:y).all
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fc08>
+    #       y        all(z)
+    #       <string> <boolean>
+    #     0 A        false
+    #     1 B        false
+    #     2 C        false
+    #
+    define_group_aggregation :all
+    # Whether any elements in each group evaluate to true.
+    #
+    # @!method any(*group_keys)
+    #   @macro group_aggregation
+    #   @example For boolean columns by default.
+    #     dataframe.group(:y).any
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000117ec>
+    #       y        any(z)
+    #       <string> <boolean>
+    #     0 A        true
+    #     1 B        true
+    #     2 C        false
+    #
+    define_group_aggregation :any
+    # Count the number of non-nil values in each group.
+    #   If counts are the same (and do not include NaN or nil),
+    #   columns for counts are unified.
+    #
+    # @!method max(*group_keys)
+    # @macro group_aggregation
+    # @example Show counts for each group.
+    #   dataframe.group(:y).count
+    #
+    #   # =>
+    #   #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000011ea04>
+    #     y        count(x) count(z)
+    #     <string>  <int64>  <int64>
+    #   0 A               2        2
+    #   1 B               3        2
+    #   2 C               1        1
+    #
+    #   dataframe.group(:z).count
+    #   # same as dataframe.group(:z).count(:x, :y)
+    #
+    #   =>
+    #   #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000122834>
+    #     z           count
+    #     <boolean> <int64>
+    #   0 false           3
+    #   1 true            2
+    #   2 (nil)           1
+    #
+    define_group_aggregation :count
     alias_method :__count, :count
     private :__count
-    def count(*summary_keys)
-      df = __count(summary_keys)
-      # if counts are the same (and do not include NaN or nil), aggregate count columns.
+    def count(*group_keys)
+      df = __count(group_keys)
       if df.pick(@group_keys.size..).to_h.values.uniq.size == 1
         df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
       else
@@ -92,19 +168,213 @@ module RedAmber
       end
     end
-    define_group_aggregation(:sum)
+    # Returns each record group size as a DataFrame.
+    #
+    # @return [DataFrame]
+    #   DataFrame consists of:
+    #   - Group key columns.
+    #   - Result columns by group aggregation.
+    # @example
+    #   penguins.group(:species).group_count
+    #
+    #   # =>
+    #   #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003a70>
+    #     species   group_count
+    #     <string>      <uint8>
+    #   0 Adelie            152
+    #   1 Chinstrap          68
+    #   2 Gentoo            124
+    #
+    def group_count
+      DataFrame.create(group_table)
+    end
+    alias_method :count_all, :group_count
-    define_group_aggregation(:product)
+    # Count the unique values in each group.
+    #
+    # @!method count_uniq(*group_keys)
+    # @macro group_aggregation
+    # @example Show counts for each group.
+    #   dataframe.group(:y).count_uniq
+    #
+    #   # =>
+    #   #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000011ea04>
+    #     y        count_uniq(x)
+    #     <string>       <int64>
+    #   0 A                    2
+    #   1 B                    3
+    #   2 C                    1
+    #
+    define_group_aggregation :count_distinct
+    def count_uniq(*group_keys)
+      df = count_distinct(*group_keys)
+      df.rename do
+        keys_org = keys.select { _1.start_with?('count_distinct') }
+        keys_renamed = keys_org.map { _1.to_s.gsub('distinct', 'uniq') }
+        keys_org.zip keys_renamed
+      end
+    end
-    define_group_aggregation(:mean)
+    # Compute maximum of values in each group for numeric columns.
+    #
+    # @!method max(*group_keys)
+    #   @macro group_aggregation
+    #   @example
+    #     dataframe.group(:y).max
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000014ae74>
+    #       y         max(x)
+    #       <string> <uint8>
+    #     0 A              2
+    #     1 B              5
+    #     2 C              6
+    #
+    define_group_aggregation :max
-    define_group_aggregation(:min)
+    # Compute mean of values in each group for numeric columns.
+    #
+    # @!method mean(*group_keys)
+    #   @macro group_aggregation
+    #   @example
+    #     dataframe.group(:y).mean
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000138a8>
+    #       y         mean(x)
+    #       <string> <double>
+    #     0 A             1.5
+    #     1 B             4.0
+    #     2 C             6.0
+    #
+    define_group_aggregation :mean
-    define_group_aggregation(:max)
+    # Compute median of values in each group for numeric columns.
+    #
+    # @!method median(*group_keys)
+    #   @macro group_aggregation
+    #   @example
+    #     dataframe.group(:y).median
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000138a8>
+    #       y        median(x)
+    #       <string>  <double>
+    #     0 A              1.5
+    #     1 B              4.0
+    #     2 C              6.0
+    #
+    define_group_aggregation :approximate_median
+    def median(*group_keys)
+      df = approximate_median(*group_keys)
+      df.rename do
+        keys_org = keys.select { _1.start_with?('approximate_') }
+        keys_renamed = keys_org.map { _1.to_s.delete_prefix('approximate_') }
+        keys_org.zip keys_renamed
+      end
+    end
-    define_group_aggregation(:stddev)
+    # Compute minimum of values in each group for numeric columns.
+    #
+    # @!method min(*group_keys)
+    #   @macro group_aggregation
+    #   @example
+    #     dataframe.group(:y).min
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000018f38>
+    #       y         min(x)
+    #       <string> <uint8>
+    #     0 A              1
+    #     1 B              3
+    #     2 C              6
+    #
+    define_group_aggregation :min
-    define_group_aggregation(:variance)
+    # Get one value from each group.
+    #
+    # @!method one(*group_keys)
+    #   @macro group_aggregation
+    #   @example
+    #     dataframe.group(:y).one
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000002885c>
+    #       y         one(x)
+    #       <string> <uint8>
+    #     0 A              1
+    #     1 B              3
+    #     2 C              6
+    #
+    define_group_aggregation :one
+    # Compute product of values in each group for numeric columns.
+    #
+    # @!method product(*group_keys)
+    #   @macro group_aggregation
+    #   @example
+    #     dataframe.group(:y).product
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000021a84>
+    #       y        product(x)
+    #       <string>   <uint64>
+    #     0 A                 2
+    #     1 B                60
+    #     2 C                 6
+    #
+    define_group_aggregation :product
+    # Compute standard deviation of values in each group for numeric columns.
+    #
+    # @!method stddev(*group_keys)
+    #   @macro group_aggregation
+    #   @example
+    #     dataframe.group(:y).stddev
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000002be6c>
+    #       y        stddev(x)
+    #       <string>  <double>
+    #     0 A              0.5
+    #     1 B            0.082
+    #     2 C              0.0
+    #
+    define_group_aggregation :stddev
+    # Compute sum of values in each group for numeric columns.
+    #
+    # @!method sum(*group_keys)
+    #   @macro group_aggregation
+    #   @example
+    #     dataframe.group(:y).sum
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000032a14>
+    #       y          sum(x)
+    #       <string> <uint64>
+    #     0 A               3
+    #     1 B              12
+    #     2 C               6
+    #
+    define_group_aggregation :sum
+    # Compute variance of values in each group for numeric columns.
+    #
+    # @!method variance(*group_keys)
+    #   @macro group_aggregation
+    #   @example
+    #     dataframe.group(:y).variance
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000003b1dc>
+    #       y        variance(x)
+    #       <string>    <double>
+    #     0 A               0.25
+    #     1 B              0.067
+    #     2 C                0.0
+    #
+    define_group_aggregation :variance
     # Returns Array of boolean filters to select each records in the Group.
     #
@@ -114,15 +384,27 @@ module RedAmber
     #
     def filters
       @filters ||= begin
-        first, *others = @group_keys.map do |key|
-          vector = @dataframe[key]
-          vector.uniq.each.map { |u| u.nil? ? vector.is_nil : vector == u }
-        end
-        if others.empty?
-          first.select(&:any?)
-        else
-          first.product(*others).map { |a| a.reduce(&:&) }.select(&:any?)
+        group_values = group_table[group_keys].each_record.map(&:to_a)
+        Enumerator.new(group_table.n_rows) do |yielder|
+          group_values.each do |values|
+            booleans =
+              values.map.with_index do |value, i|
+                column = @dataframe[group_keys[i]].data
+                if value.nil?
+                  Arrow::Function.find('is_null').execute([column])
+                elsif value.is_a?(Float) && value.nan?
+                  Arrow::Function.find('is_nan').execute([column])
+                else
+                  Arrow::Function.find('equal').execute([column, value])
+                end
+              end
+            filter =
+              booleans.reduce do |result, datum|
+                Arrow::Function.find('and_kleene').execute([result, datum])
+              end
+            yielder << Vector.create(filter.value)
+          end
         end
       end
     end
@@ -147,119 +429,174 @@ module RedAmber
     #     group size.
     #
     def each
-      filters
       return enum_for(:each) unless block_given?
-      @filters.each do |filter|
-        yield @dataframe[filter]
+      filters.each do |filter|
+        yield @dataframe.filter(filter)
       end
       @filters.size
     end
-    # Returns each record group size as a DataFrame.
+    # String representation of self.
     #
-    # @return [DataFrame]
-    #   DataFrame consists of:
-    #   - Group key columns.
-    #   - Result columns by group aggregation.
+    # @return [String]
+    #   show information of self as a String.
     # @example
-    #   penguins.group(:species).group_count
+    #   puts penguins.group(:species).inspect
     #
     #   # =>
-    #   #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003a70>
+    #   #<RedAmber::Group : 0x0000000000003a98>
     #     species   group_count
     #     <string>      <uint8>
     #   0 Adelie            152
     #   1 Chinstrap          68
     #   2 Gentoo            124
     #
-    def group_count
-      DataFrame.create(add_columns_to_table(base_table, [:group_count], [group_counts]))
+    def inspect
+      "#<#{self.class} : #{format('0x%016x', object_id)}>\n#{group_count}"
     end
-    # String representation of self.
+    # Summarize Group by aggregation functions from the block.
     #
-    # @return [String]
-    #   show information of self as a String.
-    # @example
-    #   puts penguins.group(:species).inspect
+    # @overload summarize
+    #   Summarize by a function.
+    #   @yieldparam group [Group]
+    #     passes group object self.
+    #   @yieldreturn [DataFrame]
+    #   @yieldreturn [DataFrame, Array<DataFrame>, Hash{Symbol, String => DataFrame}]
+    #     an aggregated DataFrame or an array of aggregated DataFrames.
+    #   @return [DataFrame]
+    #     summarized DataFrame.
+    #   @example Single function and single variable
+    #     group = penguins.group(:species)
+    #     group
     #
-    #   # =>
-    #   #<RedAmber::Group : 0x0000000000003a98>
-    #     species     count
-    #     <string>  <uint8>
-    #   0 Adelie        152
-    #   1 Chinstrap      68
-    #   2 Gentoo        124
+    #     # =>
+    #     #<RedAmber::Group : 0x000000000000c314>
+    #       species   group_count
+    #       <string>      <uint8>
+    #     0 Adelie            152
+    #     1 Chinstrap          68
+    #     2 Gentoo            124
     #
-    def inspect
-      "#<#{self.class} : #{format('0x%016x', object_id)}>\n#{count(@group_keys)}"
-    end
-    # Summarize Group by aggregation functions from the block.
+    #     group.summarize { mean(:bill_length_mm) }
     #
-    # @yieldparam group [Group]
-    #   passes group object self.
-    # @yieldreturn [DataFrame, Array<DataFrame>]
-    #   an aggregated DataFrame or an array of aggregated DataFrames.
-    # @return [DataFrame]
-    #   summarized DataFrame.
-    # @example Single function and single variable
-    #   group = penguins.group(:species)
-    #   group
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c364>
+    #       species   mean(bill_length_mm)
+    #       <string>              <double>
+    #     0 Adelie                   38.79
+    #     1 Chinstrap                48.83
+    #     2 Gentoo                    47.5
     #
-    #   # =>
-    #   #<RedAmber::Group : 0x000000000000c314>
-    #     species     count
-    #     <string>  <uint8>
-    #   0 Adelie        152
-    #   1 Chinstrap      68
-    #   2 Gentoo        124
+    #   @example Single function only
+    #     group.summarize { mean }
     #
-    #   group.summarize { mean(:bill_length_mm) }
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 6 Vectors, 0x000000000000c350>
+    #       species   mean(bill_length_mm) mean(bill_depth_mm) ... mean(year)
+    #       <string>              <double>            <double> ...   <double>
+    #     0 Adelie                   38.79               18.35 ...    2008.01
+    #     1 Chinstrap                48.83               18.42 ...    2007.97
+    #     2 Gentoo                    47.5               14.98 ...    2008.08
     #
-    #   # =>
-    #   #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c364>
-    #     species   mean(bill_length_mm)
-    #     <string>              <double>
-    #   0 Adelie                   38.79
-    #   1 Chinstrap                48.83
-    #   2 Gentoo                    47.5
+    # @overload summarize
+    #   Summarize by a function.
     #
-    # @example Single function only
-    #   group.summarize { mean }
+    #   @yieldparam group [Group]
+    #     passes group object self.
+    #   @yieldreturn [Array<DataFrame>]
+    #     an aggregated DataFrame or an array of aggregated DataFrames.
+    #   @return [DataFrame]
+    #     summarized DataFrame.
+    #   @example Multiple functions
+    #     group.summarize { [min(:bill_length_mm), max(:bill_length_mm)] }
     #
-    #   # =>
-    #   #<RedAmber::DataFrame : 3 x 6 Vectors, 0x000000000000c350>
-    #     species   mean(bill_length_mm) mean(bill_depth_mm) ... mean(year)
-    #     <string>              <double>            <double> ...   <double>
-    #   0 Adelie                   38.79               18.35 ...    2008.01
-    #   1 Chinstrap                48.83               18.42 ...    2007.97
-    #   2 Gentoo                    47.5               14.98 ...    2008.08
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
+    #       species   min(bill_length_mm) max(bill_length_mm)
+    #       <string>             <double>            <double>
+    #     0 Adelie                   32.1                46.0
+    #     1 Chinstrap                40.9                58.0
+    #     2 Gentoo                   40.9                59.6
     #
-    # @example Multiple functions
-    #   group.summarize { [min(:bill_length_mm), max(:bill_length_mm)] }
+    # @overload summarize
+    #   Summarize by a function.
     #
-    #   # =>
-    #   #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
-    #     species   min(bill_length_mm) max(bill_length_mm)
-    #     <string>             <double>            <double>
-    #   0 Adelie                   32.1                46.0
-    #   1 Chinstrap                40.9                58.0
-    #   2 Gentoo                   40.9                59.6
-    #
-    def summarize(&block)
-      agg = instance_eval(&block)
+    #   @yieldparam group [Group]
+    #     passes group object self.
+    #   @yieldreturn [Hash{Symbol, String => DataFrame}]
+    #     an aggregated DataFrame or an array of aggregated DataFrames.
+    #     The DataFrame must return only one aggregated column.
+    #   @return [DataFrame]
+    #     summarized DataFrame.
+    #   @example Rename column name by Hash
+    #     group.summarize {
+    #       {
+    #         min_bill_length_mm: min(:bill_length_mm),
+    #         max_bill_length_mm: max(:bill_length_mm),
+    #       }
+    #     }
+    #
+    #     # =>
+    #     #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
+    #       species   min_bill_length_mm max_bill_length_mm
+    #       <string>            <double>           <double>
+    #     0 Adelie                  32.1               46.0
+    #     1 Chinstrap               40.9               58.0
+    #     2 Gentoo                  40.9               59.6
+    #
+    def summarize(*args, &block)
+      if block
+        agg = instance_eval(&block)
+        unless args.empty?
+          agg = [agg] if agg.is_a?(DataFrame)
+          agg = args.zip(agg).to_h
+        end
+      else
+        agg = args
+      end
       case agg
       when DataFrame
         agg
       when Array
-        agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
+        aggregations =
+          agg.map do |df|
+            v = df.vectors[-1]
+            [v.key, v]
+          end
+        agg[0].assign(aggregations)
+      when Hash
+        aggregations =
+          agg.map do |key, df|
+            aggregated_keys = df.keys - @group_keys
+            if aggregated_keys.size > 1
+              message =
+                "accept only one column from the Hash: #{aggregated_keys.join(', ')}"
+              raise GroupArgumentError, message
+            end
+            v = df.vectors[-1]
+            [key, v]
+          end
+        agg.values[-1].drop(-1).assign(aggregations)
       else
         raise GroupArgumentError, "Unknown argument: #{agg}"
       end
     end
+    # Return grouped DataFrame only for group keys.
+    #
+    # @return [DataFrame]
+    #   grouped DataFrame projected only for group_keys.
+    # @since 0.5.0
+    #
+    def grouped_frame
+      DataFrame.create(group_table[group_keys])
+    end
+    alias_method :none, :grouped_frame
     # Aggregating summary.
     #
     # @api private
@@ -270,37 +607,49 @@ module RedAmber
     private
-    def build_aggregation_keys(function_name, summary_keys)
-      if summary_keys.empty?
-        [function_name]
-      else
-        summary_keys.map { |key| "#{function_name}(#{key})" }
-      end
-    end
-    # @note `@group_counts.sum == @dataframe.size``
-    def group_counts
-      @group_counts ||= filters.map(&:sum)
+    def group_table
+      @group_table ||= build_aggregated_table
     end
-    def base_table
-      @base_table ||= begin
-        indexes = filters.map { |filter| filter.index(true) }
-        @dataframe.table[@group_keys].take(indexes)
+    def build_aggregated_table
+      keys = @group_keys
+      key = keys[0]
+      table = @dataframe.table
+      plan = Arrow::ExecutePlan.new
+      source_node = plan.build_source_node(table)
+      aggregate_node =
+        plan.build_aggregate_node(source_node, {
+                                    aggregations: [{ function: 'hash_count',
+                                                     input: key }], keys: keys
+                                  })
+      expressions = keys.map { |k| Arrow::FieldExpression.new(k) }
+      null_count = Arrow::Function.find('is_null').execute([table[key]]).value.sum
+      count_field = Arrow::FieldExpression.new("count(#{key})")
+      if null_count.zero?
+        expressions << count_field
+      else
+        is_zero =
+          Arrow::CallExpression.new('equal', [count_field, Arrow::Int64Scalar.new(0)])
+        null_count_scalar = Arrow::Int64Scalar.new(null_count)
+        expressions <<
+          Arrow::CallExpression.new('if_else', [
+                                      is_zero, null_count_scalar, count_field
+                                    ])
       end
-    end
+      options = Arrow::ProjectNodeOptions.new(expressions, keys + [:group_count])
+      project_node = plan.build_project_node(aggregate_node, options)
-    def add_columns_to_table(table, keys, data_arrays)
-      fields = table.schema.fields
-      arrays = table.columns.map(&:data)
+      sink_and_start_plan(plan, project_node)
+    end
-      keys.zip(data_arrays).each do |key, array|
-        data = Arrow::ChunkedArray.new([array])
-        fields << Arrow::Field.new(key, data.value_data_type)
-        arrays << data
+    def build_aggregation_keys(function_name, summary_keys)
+      if summary_keys.empty?
+        [function_name]
+      else
+        summary_keys.map { |key| "#{function_name}(#{key})" }
       end
-      Arrow::Table.new(Arrow::Schema.new(fields), arrays)
     end
     # Call Vector aggregating function and return an array of arrays: