RubyGems - rover-df - Versions diffs - 0.2.8 → 0.3.2 - Mend

rover-df 0.2.8 → 0.3.2

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 65d2fda186484e920421543e2f0203635054ccb8a23250bd3fc6a9d8c328725f
-  data.tar.gz: e4cd1e6d69e1e4f340f6692111476a5be9405f348841cfba6f6c431f04d85347
+  metadata.gz: 44146e3081c968813848026d2a7f785527a0bb55af0a1978a8087d0dcc7a568a
+  data.tar.gz: db64041059937e131d27799739506ad27a78d19be3bdd90d299bcee855b54755
 SHA512:
-  metadata.gz: c720f3bc45178f938c20546ac1b7279ae047affafce5e06cff4f703e1d8ff7a99c1bca94a3f40cb7d26945d770bf136a2adc3477cf6ffc3cdaad9a15aa6090a1
-  data.tar.gz: c44135cc0e70b08b72e1084565ef3479bcb92000bf34662b76a25933e68ad33a584afae071ddebfd5724ad61fe7e7dbc283241d7194c532dd70f36b1358b266d
+  metadata.gz: f66190d43258016bc54da2ee42078087784e2c245095fceced4f617b4343130c8e324fd07a4fb0a08b6e23f512268d517e330087959dd7a78187228383189ea8
+  data.tar.gz: a1d3a80ff866d72dc32a0067240c32f7a24c602a736d932dba394af8b760566591ff0d2942a61243230016182a3a5fad2e635fa1095f809caf477146d0810868

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,25 @@
+## 0.3.2 (2022-07-10)
+- Added `sqrt` method to vectors
+- Improved numeric operations between scalars and vectors
+- Improved performance of `tally`
+## 0.3.1 (2022-05-18)
+- Added `to!` to vectors
+- Fixed error with `nil` and `:float64` type
+- Fixed `:header_converters` option with `read_csv` and `parse_csv`
+## 0.3.0 (2022-04-03)
+- Added `deep_dup` method to data frames
+- Changed `:int` to `:int64`, `:uint` to `:uint64`, and `:float` to `:float64` for type methods
+- Changed missing column to raise `KeyError` instead of `ArgumentError` for aggregate methods
+- Changed passing too many headers to `read_csv` and `parse_csv` to raise `ArgumentError`
+- Changed empty string in CSV headers to match behavior of `nil`
+- Fixed `clone` and `dup` method for vectors
+- Dropped support for Ruby < 2.7
 ## 0.2.8 (2022-03-15)
 - Added `group` and `stacked` options to `plot`

data/README.md CHANGED Viewed

@@ -424,22 +424,22 @@ df.to_parquet
 You can specify column types when creating a data frame
 ```ruby
-Rover::DataFrame.new(data, types: {"a" => :int, "b" => :float})
+Rover::DataFrame.new(data, types: {"a" => :int64, "b" => :float64})
 ```
 Or
 ```ruby
-Rover.read_csv("data.csv", types: {"a" => :int, "b" => :float})
+Rover.read_csv("data.csv", types: {"a" => :int64, "b" => :float64})
 ```
 Supported types are:
-- boolean - `bool`
-- float - `float`, `float32`
-- integer - `int`, `int32`, `int16`, `int8`
-- unsigned integer - `uint`, `uint32`, `uint16`, `uint8`
-- object - `object`
+- boolean - `:bool`
+- float - `:float64`, `:float32`
+- integer - `:int64`, `:int32`, `:int16`, `:int8`
+- unsigned integer - `:uint64`, `:uint32`, `:uint16`, `:uint8`
+- object - `:object`
 Get column types
@@ -456,7 +456,7 @@ df[:a].type
 Change the type of a column
 ```ruby
-df[:a] = df[:a].to(:int)
+df[:a].to!(:int32)
 ```
 ## History

data/lib/rover/data_frame.rb CHANGED Viewed

@@ -72,7 +72,7 @@ module Rover
         # multiple columns
         df = DataFrame.new
         where.each do |k|
-          check_column(k, true)
+          check_column(k)
           df[k] = @vectors[k]
         end
         df
@@ -102,7 +102,7 @@ module Rover
     def []=(k, v)
       check_key(k)
       v = to_vector(v, size: size)
-      raise ArgumentError, "Size mismatch: expected #{size}, got #{v.size}" if @vectors.any? && v.size != size
+      raise ArgumentError, "Size mismatch (given #{v.size}, expected #{size})" if @vectors.any? && v.size != size
       @vectors[k] = v
     end
@@ -242,11 +242,11 @@ module Rover
       types.each do |name, type|
         schema[name] =
           case type
-          when :int
+          when :int64
             :int64
-          when :uint
+          when :uint64
             :uint64
-          when :float
+          when :float64
             :double
           when :float32
             :float
@@ -346,10 +346,10 @@ module Rover
       end
     end
-    def dup
+    def deep_dup
       df = DataFrame.new
       @vectors.each do |k, v|
-        df[k] = v
+        df[k] = v.dup
       end
       df
     end
@@ -503,8 +503,20 @@ module Rover
     private
+    # for clone
+    def initialize_clone(_)
+      @vectors = @vectors.clone
+      super
+    end
+    # for dup
+    def initialize_dup(_)
+      @vectors = @vectors.dup
+      super
+    end
     def check_key(key)
-      raise ArgumentError, "Key must be a string or symbol, got #{key.inspect}" unless key.is_a?(String) || key.is_a?(Symbol)
+      raise ArgumentError, "Key must be a String or Symbol, given #{key.class.name}" unless key.is_a?(String) || key.is_a?(Symbol)
     end
     # TODO make more efficient
@@ -565,19 +577,9 @@ module Rover
       raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
     end
-    # TODO in 0.3.0
-    # always use did_you_mean
-    def check_column(key, did_you_mean = false)
+    def check_column(key)
       unless include?(key)
-        if did_you_mean
-          if RUBY_VERSION.to_f >= 2.6
-            raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
-          else
-            raise KeyError.new("Missing column: #{key}")
-          end
-        else
-          raise ArgumentError, "Missing column: #{key}"
-        end
+        raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
       end
     end

data/lib/rover/group.rb CHANGED Viewed

@@ -1,12 +1,11 @@
 module Rover
   class Group
-    # TODO raise ArgumentError for empty columns in 0.3.0
     def initialize(df, columns)
       @df = df
       @columns = columns
+      check_columns
     end
-    # TODO raise ArgumentError for empty columns in 0.3.0
     def group(*columns)
       Group.new(@df, @columns + columns.flatten)
     end
@@ -38,10 +37,6 @@ module Rover
     def grouped_dfs
       # cache here so we can reuse for multiple calcuations if needed
       @grouped_dfs ||= begin
-        raise ArgumentError, "No columns given" if @columns.empty?
-        missing_keys = @columns - @df.keys
-        raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
         groups = Hash.new { |hash, key| hash[key] = [] }
         i = 0
         @df.each_row do |row|
@@ -56,5 +51,12 @@ module Rover
         result
       end
     end
+    def check_columns
+      raise ArgumentError, "No columns given" if @columns.empty?
+      missing_keys = @columns - @df.keys
+      raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
+    end
   end
 end

data/lib/rover/vector.rb CHANGED Viewed

@@ -1,23 +1,23 @@
 module Rover
   class Vector
     # if a user never specifies types,
-    # the defaults are bool, float, int, and object
-    # keep these simple
-    #
-    # we could create aliases for float64, int64, uint64
-    # if so, type should still return the simple type
+    # the defaults are bool, float64, int64, and object
     TYPE_CAST_MAPPING = {
       bool: Numo::Bit,
       float32: Numo::SFloat,
-      float: Numo::DFloat,
+      float64: Numo::DFloat,
       int8: Numo::Int8,
       int16: Numo::Int16,
       int32: Numo::Int32,
-      int: Numo::Int64,
+      int64: Numo::Int64,
       object: Numo::RObject,
       uint8: Numo::UInt8,
       uint16: Numo::UInt16,
       uint32: Numo::UInt32,
+      uint64: Numo::UInt64,
+      # legacy - must come last
+      float: Numo::DFloat,
+      int: Numo::Int64,
       uint: Numo::UInt64
     }
@@ -31,7 +31,12 @@ module Rover
     end
     def to(type)
-      Vector.new(self, type: type)
+      dup.to!(type)
+    end
+    def to!(type)
+      @data = cast_data(@data, type: type)
+      self
     end
     def to_numo
@@ -179,13 +184,9 @@ module Rover
       Vector.new(@data.to_a.reject(&block))
     end
+    # use Ruby tally for performance
     def tally
-      result = Hash.new(0)
-      @data.each do |v|
-        result[v] += 1
-      end
-      result.default = nil
-      result
+      @data.to_a.tally
     end
     def sort
@@ -196,6 +197,16 @@ module Rover
       Vector.new(@data.abs)
     end
+    def sqrt
+      data =
+        if @data.is_a?(Numo::SFloat)
+          Numo::SFloat::Math.sqrt(@data)
+        else
+          Numo::DFloat::Math.sqrt(@data)
+        end
+      Vector.new(data)
+    end
     def each(&block)
       @data.each(&block)
     end
@@ -333,6 +344,26 @@ module Rover
     private
+    # for clone
+    def initialize_clone(_)
+      @data = @data.clone
+      super
+    end
+    # for dup
+    def initialize_dup(_)
+      @data = @data.dup
+      super
+    end
+    def coerce(other)
+      if other.is_a?(Numeric)
+        [Vector.new([other]), self]
+      else
+        raise TypeError, "#{self.class} can't be coerced into #{other.class}"
+      end
+    end
     def cast_data(data, type: nil)
       numo_type = numo_type(type) if type
@@ -359,7 +390,7 @@ module Rover
         data = data.to_a
         if type
-          data = data.map { |v| v || Float::NAN } if [:float, :float32].include?(type)
+          data = data.map { |v| v || Float::NAN } if [:float, :float32, :float64].include?(type)
           data = numo_type.cast(data)
         else
           data =

data/lib/rover/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Rover
-  VERSION = "0.2.8"
+  VERSION = "0.3.2"
 end

data/lib/rover.rb CHANGED Viewed

@@ -40,12 +40,12 @@ module Rover
       raise ArgumentError, "Must specify headers" if headers == false
-      # TODO use date converter
+      # TODO use date converter in 0.4.0 - need to test performance
       table = yield({converters: :numeric}.merge(csv_options))
       headers = nil if headers == true
-      if headers && table.first && headers.size < table.first.size
-        raise ArgumentError, "Expected #{table.first.size} headers, got #{headers.size}"
+      if headers && table.first && headers.size != table.first.size
+        raise ArgumentError, "Expected #{table.first.size} headers, given #{headers.size}"
       end
       table_headers = (headers || table.shift || []).dup
@@ -55,13 +55,18 @@ module Rover
           table_headers << nil
         end
       end
+      # TODO handle date converters
+      table_headers = table_headers.map! { |v| v.nil? ? nil : v.to_s }
+      if csv_options[:header_converters]
+        table_headers = CSV.parse(CSV.generate_line(table_headers), headers: true, header_converters: csv_options[:header_converters]).headers
+      end
       data = {}
       keys = table_headers.map { |k| [k, true] }.to_h
       unnamed_suffix = 1
       table_headers.each_with_index do |k, i|
-        # TODO do same for empty string in 0.3.0
-        if k.nil?
+        if k.nil? || k.empty?
           k = "unnamed"
           while keys.include?(k)
             unnamed_suffix += 1

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rover-df
 version: !ruby/object:Gem::Version
-  version: 0.2.8
+  version: 0.3.2
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-03-15 00:00:00.000000000 Z
+date: 2022-07-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -51,7 +51,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '2.4'
+      version: '2.7'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="