RubyGems - rover-df - Versions diffs - 0.2.8 → 0.3.2 - Mend

rover-df 0.2.8 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 65d2fda186484e920421543e2f0203635054ccb8a23250bd3fc6a9d8c328725f
-  data.tar.gz: e4cd1e6d69e1e4f340f6692111476a5be9405f348841cfba6f6c431f04d85347
+  metadata.gz: 44146e3081c968813848026d2a7f785527a0bb55af0a1978a8087d0dcc7a568a
+  data.tar.gz: db64041059937e131d27799739506ad27a78d19be3bdd90d299bcee855b54755
 SHA512:
-  metadata.gz: c720f3bc45178f938c20546ac1b7279ae047affafce5e06cff4f703e1d8ff7a99c1bca94a3f40cb7d26945d770bf136a2adc3477cf6ffc3cdaad9a15aa6090a1
-  data.tar.gz: c44135cc0e70b08b72e1084565ef3479bcb92000bf34662b76a25933e68ad33a584afae071ddebfd5724ad61fe7e7dbc283241d7194c532dd70f36b1358b266d
+  metadata.gz: f66190d43258016bc54da2ee42078087784e2c245095fceced4f617b4343130c8e324fd07a4fb0a08b6e23f512268d517e330087959dd7a78187228383189ea8
+  data.tar.gz: a1d3a80ff866d72dc32a0067240c32f7a24c602a736d932dba394af8b760566591ff0d2942a61243230016182a3a5fad2e635fa1095f809caf477146d0810868

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,25 @@
+## 0.3.2 (2022-07-10)
+- Added `sqrt` method to vectors
+- Improved numeric operations between scalars and vectors
+- Improved performance of `tally`
+## 0.3.1 (2022-05-18)
+- Added `to!` to vectors
+- Fixed error with `nil` and `:float64` type
+- Fixed `:header_converters` option with `read_csv` and `parse_csv`
+## 0.3.0 (2022-04-03)
+- Added `deep_dup` method to data frames
+- Changed `:int` to `:int64`, `:uint` to `:uint64`, and `:float` to `:float64` for type methods
+- Changed missing column to raise `KeyError` instead of `ArgumentError` for aggregate methods
+- Changed passing too many headers to `read_csv` and `parse_csv` to raise `ArgumentError`
+- Changed empty string in CSV headers to match behavior of `nil`
+- Fixed `clone` and `dup` method for vectors
+- Dropped support for Ruby < 2.7
 ## 0.2.8 (2022-03-15)
 - Added `group` and `stacked` options to `plot`

data/README.md CHANGED Viewed

@@ -424,22 +424,22 @@ df.to_parquet
 You can specify column types when creating a data frame
 ```ruby
-Rover::DataFrame.new(data, types: {"a" => :int, "b" => :float})
+Rover::DataFrame.new(data, types: {"a" => :int64, "b" => :float64})
 ```
 Or
 ```ruby
-Rover.read_csv("data.csv", types: {"a" => :int, "b" => :float})
+Rover.read_csv("data.csv", types: {"a" => :int64, "b" => :float64})
 ```
 Supported types are:
-- boolean - `bool`
-- float - `float`, `float32`
-- integer - `int`, `int32`, `int16`, `int8`
-- unsigned integer - `uint`, `uint32`, `uint16`, `uint8`
-- object - `object`
+- boolean - `:bool`
+- float - `:float64`, `:float32`
+- integer - `:int64`, `:int32`, `:int16`, `:int8`
+- unsigned integer - `:uint64`, `:uint32`, `:uint16`, `:uint8`
+- object - `:object`
 Get column types
@@ -456,7 +456,7 @@ df[:a].type
 Change the type of a column
 ```ruby
-df[:a] = df[:a].to(:int)
+df[:a].to!(:int32)
 ```
 ## History

data/lib/rover/data_frame.rb CHANGED Viewed

@@ -72,7 +72,7 @@ module Rover
         # multiple columns
         df = DataFrame.new
         where.each do |k|
-          check_column(k, true)
+          check_column(k)
           df[k] = @vectors[k]
         end
         df
@@ -102,7 +102,7 @@ module Rover
     def []=(k, v)
       check_key(k)
       v = to_vector(v, size: size)
-      raise ArgumentError, "Size mismatch: expected #{size}, got #{v.size}" if @vectors.any? && v.size != size
+      raise ArgumentError, "Size mismatch (given #{v.size}, expected #{size})" if @vectors.any? && v.size != size
       @vectors[k] = v
     end
@@ -242,11 +242,11 @@ module Rover
       types.each do |name, type|
         schema[name] =
           case type
-          when :int
+          when :int64
             :int64
-          when :uint
+          when :uint64
             :uint64
-          when :float
+          when :float64
             :double
           when :float32
             :float
@@ -346,10 +346,10 @@ module Rover
       end
     end
-    def dup
+    def deep_dup
       df = DataFrame.new
       @vectors.each do |k, v|
-        df[k] = v
+        df[k] = v.dup
       end
       df
     end
@@ -503,8 +503,20 @@ module Rover
     private
+    # for clone
+    def initialize_clone(_)
+      @vectors = @vectors.clone
+      super
+    end
+    # for dup
+    def initialize_dup(_)
+      @vectors = @vectors.dup
+      super
+    end
     def check_key(key)
-      raise ArgumentError, "Key must be a string or symbol, got #{key.inspect}" unless key.is_a?(String) || key.is_a?(Symbol)
+      raise ArgumentError, "Key must be a String or Symbol, given #{key.class.name}" unless key.is_a?(String) || key.is_a?(Symbol)
     end
     # TODO make more efficient
@@ -565,19 +577,9 @@ module Rover
       raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
     end
-    # TODO in 0.3.0
-    # always use did_you_mean
-    def check_column(key, did_you_mean = false)
+    def check_column(key)
       unless include?(key)
-        if did_you_mean
-          if RUBY_VERSION.to_f >= 2.6
-            raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
-          else
-            raise KeyError.new("Missing column: #{key}")
-          end
-        else
-          raise ArgumentError, "Missing column: #{key}"
-        end
+        raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
       end
     end

data/lib/rover/group.rb CHANGED Viewed

@@ -1,12 +1,11 @@
 module Rover
   class Group
-    # TODO raise ArgumentError for empty columns in 0.3.0
     def initialize(df, columns)
       @df = df
       @columns = columns
+      check_columns
     end
-    # TODO raise ArgumentError for empty columns in 0.3.0
     def group(*columns)
       Group.new(@df, @columns + columns.flatten)
     end
@@ -38,10 +37,6 @@ module Rover
     def grouped_dfs
       # cache here so we can reuse for multiple calcuations if needed
       @grouped_dfs ||= begin
-        raise ArgumentError, "No columns given" if @columns.empty?
-        missing_keys = @columns - @df.keys
-        raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
         groups = Hash.new { |hash, key| hash[key] = [] }
         i = 0
         @df.each_row do |row|
@@ -56,5 +51,12 @@ module Rover
         result
       end
     end
+    def check_columns
+      raise ArgumentError, "No columns given" if @columns.empty?
+      missing_keys = @columns - @df.keys
+      raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
+    end
   end
 end

data/lib/rover/vector.rb CHANGED Viewed

@@ -1,23 +1,23 @@
 module Rover
   class Vector
     # if a user never specifies types,
-    # the defaults are bool, float, int, and object
-    # keep these simple
-    #
-    # we could create aliases for float64, int64, uint64
-    # if so, type should still return the simple type
+    # the defaults are bool, float64, int64, and object
     TYPE_CAST_MAPPING = {
       bool: Numo::Bit,
       float32: Numo::SFloat,
-      float: Numo::DFloat,
+      float64: Numo::DFloat,
       int8: Numo::Int8,
       int16: Numo::Int16,
       int32: Numo::Int32,
-      int: Numo::Int64,
+      int64: Numo::Int64,
       object: Numo::RObject,
       uint8: Numo::UInt8,
       uint16: Numo::UInt16,
       uint32: Numo::UInt32,
+      uint64: Numo::UInt64,
+      # legacy - must come last
+      float: Numo::DFloat,
+      int: Numo::Int64,
       uint: Numo::UInt64
     }
@@ -31,7 +31,12 @@ module Rover
     end
     def to(type)
-      Vector.new(self, type: type)
+      dup.to!(type)
+    end
+    def to!(type)
+      @data = cast_data(@data, type: type)
+      self
     end
     def to_numo
@@ -179,13 +184,9 @@ module Rover
       Vector.new(@data.to_a.reject(&block))
     end
+    # use Ruby tally for performance
     def tally
-      result = Hash.new(0)
-      @data.each do |v|
-        result[v] += 1
-      end
-      result.default = nil
-      result
+      @data.to_a.tally
     end
     def sort
@@ -196,6 +197,16 @@ module Rover
       Vector.new(@data.abs)
     end
+    def sqrt
+      data =
+        if @data.is_a?(Numo::SFloat)
+          Numo::SFloat::Math.sqrt(@data)
+        else
+          Numo::DFloat::Math.sqrt(@data)
+        end
+      Vector.new(data)
+    end
     def each(&block)
       @data.each(&block)
     end
@@ -333,6 +344,26 @@ module Rover
     private
+    # for clone
+    def initialize_clone(_)
+      @data = @data.clone
+      super
+    end
+    # for dup
+    def initialize_dup(_)
+      @data = @data.dup
+      super
+    end
+    def coerce(other)
+      if other.is_a?(Numeric)
+        [Vector.new([other]), self]
+      else
+        raise TypeError, "#{self.class} can't be coerced into #{other.class}"
+      end
+    end
     def cast_data(data, type: nil)
       numo_type = numo_type(type) if type
@@ -359,7 +390,7 @@ module Rover
         data = data.to_a
         if type
-          data = data.map { |v| v || Float::NAN } if [:float, :float32].include?(type)
+          data = data.map { |v| v || Float::NAN } if [:float, :float32, :float64].include?(type)
           data = numo_type.cast(data)
         else
           data =

data/lib/rover/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Rover
-  VERSION = "0.2.8"
+  VERSION = "0.3.2"
 end

data/lib/rover.rb CHANGED Viewed

@@ -40,12 +40,12 @@ module Rover
       raise ArgumentError, "Must specify headers" if headers == false
-      # TODO use date converter
+      # TODO use date converter in 0.4.0 - need to test performance
       table = yield({converters: :numeric}.merge(csv_options))
       headers = nil if headers == true
-      if headers && table.first && headers.size < table.first.size
-        raise ArgumentError, "Expected #{table.first.size} headers, got #{headers.size}"
+      if headers && table.first && headers.size != table.first.size
+        raise ArgumentError, "Expected #{table.first.size} headers, given #{headers.size}"
       end
       table_headers = (headers || table.shift || []).dup
@@ -55,13 +55,18 @@ module Rover
           table_headers << nil
         end
       end
+      # TODO handle date converters
+      table_headers = table_headers.map! { |v| v.nil? ? nil : v.to_s }
+      if csv_options[:header_converters]
+        table_headers = CSV.parse(CSV.generate_line(table_headers), headers: true, header_converters: csv_options[:header_converters]).headers
+      end
       data = {}
       keys = table_headers.map { |k| [k, true] }.to_h
       unnamed_suffix = 1
       table_headers.each_with_index do |k, i|
-        # TODO do same for empty string in 0.3.0
-        if k.nil?
+        if k.nil? || k.empty?
           k = "unnamed"
           while keys.include?(k)
             unnamed_suffix += 1

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rover-df
 version: !ruby/object:Gem::Version
-  version: 0.2.8
+  version: 0.3.2
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-03-15 00:00:00.000000000 Z
+date: 2022-07-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -51,7 +51,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '2.4'
+      version: '2.7'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="