RubyGems - red-datasets - Versions diffs - 0.0.8 → 0.0.9 - Mend

red-datasets 0.0.8 → 0.0.9

Files changed (13) hide show

checksums.yaml +4 -4
data/doc/text/news.md +33 -0
data/lib/datasets.rb +1 -0
data/lib/datasets/dataset.rb +12 -0
data/lib/datasets/libsvm-dataset-list.rb +194 -54
data/lib/datasets/libsvm.rb +1 -9
data/lib/datasets/mushroom.rb +256 -0
data/lib/datasets/table.rb +83 -3
data/lib/datasets/version.rb +1 -1
data/lib/datasets/wikipedia.rb +2 -10
data/test/test-mushroom.rb +80 -0
data/test/test-table.rb +123 -18
metadata +16 -13

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c7a9199546e7a001c97e45c6fa28db15c0d96b748e527d9705dfee4e4b1db6fd
-  data.tar.gz: c659f6ae1e658ad91210e4427be063463124d89ef90388d34ebfb73ceb49068a
+  metadata.gz: 81ed53e83d75d517052aaf07c66fe177f12f986584141c951ac1dcfa2fc88646
+  data.tar.gz: 94b9f3b8042eaad65304bf7c3d2fc35519f8328b0ca4e9f8a7ad9be13781a91e
 SHA512:
-  metadata.gz: d8a23c4a165a596df22ce5bbe1f8f0cd5c0f002deecafbb26cd5e5f75abb3c0224c1013898162a67787159258d1b801395fc4d949c17939d95940664cffd5600
-  data.tar.gz: f2fd4eb733e6205f138c4005627e815e3787040a8a4b6cce7eca9fd5d4adaa12263e17e8f5bd9394a851e5210f28736ee3c682c81e110da304ae17fb3f0bedba
+  metadata.gz: c73561ed005e4b58f27fc6de969605a22d57adf4bc5b5184e5cdb65739f1ac6b86f6ed67794bfe61164859fc4a1b0f80430bc819b2ea37ac455a560a6f008b13
+  data.tar.gz: 07560b09d68272dc7a959c16ec03975d1fa752f9d6930f0fd746c46e9236995606694f5899bad5bf770812c5a2d81e6f013353f680fc8adf65ad42bae514f57c

data/doc/text/news.md CHANGED

@@ -1,5 +1,38 @@
 # News
+## 0.0.9 - 2019-09-09
+### Improvements
+  * `Datasets::LIBSVMDatasetList`: Improved performance.
+  * `Datasets::Mushroom`: Added.
+    [GitHub#33][Patch by Yasuo Honda]
+  * `Datasets::Table#n_columns`: Added.
+  * `Datasets::Table#n_rows`: Added.
+  * `Datasets::Table#[]`: Added support for index access.
+  * `Datasets::Table#coolumn_names`: Added.
+  * `Datasets::Table#size`: Added.
+  * `Datasets::Table#length`: Added.
+  * `Datasets::Table#each_column`: Added.
+  * `Datasets::Table#each_record`: Added.
+  * `Datasets::Table#find_record`: Added.
+### Thanks
+  * Yasuo Honda
+### Improvements
 ## 0.0.8 - 2019-03-24
 ### Improvements

data/lib/datasets.rb CHANGED

@@ -7,6 +7,7 @@ require_relative "datasets/iris"
 require_relative "datasets/libsvm"
 require_relative "datasets/libsvm-dataset-list"
 require_relative "datasets/mnist"
+require_relative "datasets/mushroom"
 require_relative "datasets/penn-treebank"
 require_relative "datasets/postal-code-japan"
 require_relative "datasets/wikipedia"

data/lib/datasets/dataset.rb CHANGED

@@ -34,5 +34,17 @@ module Datasets
       downloader = Downloader.new(url)
       downloader.download(output_path)
     end
+    def extract_bz2(path)
+      input, output = IO.pipe
+      pid = spawn("bzcat", path.to_s, {:out => output})
+      begin
+        output.close
+        yield(input)
+      ensure
+        input.close
+        Process.waitpid(pid)
+      end
+    end
   end
 end

data/lib/datasets/libsvm-dataset-list.rb CHANGED

@@ -1,5 +1,6 @@
-require "English"
-require "rexml/document"
+require "rexml/streamlistener"
+require "rexml/parsers/baseparser"
+require "rexml/parsers/streamparser"
 require_relative "dataset"
@@ -32,26 +33,17 @@ module Datasets
       end
     end
-    def each
+    def each(&block)
       return to_enum(__method__) unless block_given?
       open_data do |input|
-        # TODO: Improve performance
-        document = REXML::Document.new(input)
-        is_header = true
-        document.each_element("//tr") do |tr|
-          if is_header
-            is_header = false
-            next
+        catch do |abort_tag|
+          listener = IndexListener.new(abort_tag) do |href, record|
+            parse_detail(href, record)
+            yield(record)
           end
-          name = tr.elements.first
-          a = name.elements.first
-          href = a.attributes["href"]
-          record = Record.new
-          record.name = a.text
-          record.files = []
-          parse_detail(href, record)
-          yield(record)
+          parser = REXML::Parsers::StreamParser.new(input, listener)
+          parser.parse
         end
       end
     end
@@ -69,17 +61,11 @@ module Datasets
     def extract_description
       open_data do |input|
-        document = REXML::Document.new(input)
         description = []
-        in_content = false
-        document.each_element("//body/*") do |element|
-          unless in_content
-            in_content = (element.name == "h1")
-            next
-          end
-          break if element.name == "hr"
-          content = extract_text(element)
-          description << content unless content.empty?
+        catch do |abort_tag|
+          listener = DescriptionListener.new(abort_tag, description)
+          parser = REXML::Parsers::StreamParser.new(input, listener)
+          parser.parse
         end
         description.join("\n\n")
       end
@@ -102,36 +88,190 @@ module Datasets
     def parse_detail(href, record)
       path, id = href.split("#")
-      open_detail(path) do |detail|
-        detail_document = REXML::Document.new(detail)
-        anchor = REXML::XPath.match(detail_document, "//*[@name='#{id}']")[0]
-        ul = anchor.next_sibling
-        ul.each_element do |li|
-          text = extract_text(li)
-          case text
-          when /\ASource: /
-            record.source = $POSTMATCH
-          when /\APreprocessing: /
-            record.preprocessing = $POSTMATCH
-          when /\A\# of classes: (\d+)/
-            record.n_classes = Integer($1, 10)
-          when /\A\# of data: ([\d,]+)/
-            record.n_data = Integer($1.gsub(/,/, ""), 10)
-          when /\A\# of features: ([\d,]+)/
-            record.n_features = Integer($1.gsub(/,/, ""), 10)
-          when /\AFiles:/
-            li.elements.first.each_element do |file_li|
-              file_a = file_li.elements.first
-              file = File.new
-              file.name = file_a.text
-              file.url = @metadata.url + file_a.attributes["href"]
-              file_note = file_li.text
-              file.note = file_note.strip.gsub(/[()]/, "") if file_note
-              record.files << file
+      open_detail(path) do |input|
+        catch do |abort_tag|
+          listener = DetailListener.new(abort_tag, id, @metadata.url, record)
+          parser = REXML::Parsers::StreamParser.new(input, listener)
+          parser.parse
+        end
+      end
+    end
+    class IndexListener
+      include REXML::StreamListener
+      def initialize(abort_tag, &block)
+        @abort_tag = abort_tag
+        @block = block
+        @row = nil
+        @in_td = false
+      end
+      def tag_start(name, attributes)
+        case name
+        when "tr"
+          @row = []
+        when "td"
+          @in_td = true
+          @row << {:text => ""}
+        when "a"
+          @row.last[:href] = attributes["href"] if @in_td
+        end
+      end
+      def tag_end(name)
+        case name
+        when "table"
+          throw(@abort_tag)
+        when "tr"
+          name_column = @row[0]
+          return unless name_column
+          record = Record.new
+          record.name = name_column[:text]
+          record.files = []
+          @block.call(name_column[:href], record)
+        when "td"
+          @in_td = false
+        end
+      end
+      def text(data)
+        @row.last[:text] << data if @in_td
+      end
+    end
+    class DetailListener
+      include REXML::StreamListener
+      def initialize(abort_tag, id, base_url, record)
+        @abort_tag = abort_tag
+        @id = id
+        @base_url = base_url
+        @record = record
+        @in_target = false
+        @target_li_level = nil
+        @key = nil
+        @data = nil
+        @file = nil
+      end
+      def tag_start(name, attributes)
+        if @in_target
+          case name
+          when "li"
+            @target_li_level += 1
+            case @target_li_level
+            when 0
+              @key = nil
+              @data = nil
+              @file = nil
+            when 1
+              @file = File.new
             end
+          when "a"
+            @file.url = @base_url + attributes["href"] if @file
+          end
+        else
+          if attributes["name"] == @id
+            @in_target = true
+            @target_li_level = -1
+          end
+        end
+      end
+      def tag_end(name)
+        if @in_target
+          case name
+          when "ul"
+            throw(@abort_tag) if @target_li_level == -1
+          when "li"
+            case @target_li_level
+            when 0
+              if @key
+                data = @data
+                data = data.gsub(/[ \t\n]+/, " ").strip if data.is_a?(String)
+                @record[@key] = data
+              end
+            when 1
+              @data << @file if @data and @file
+            end
+            @target_li_level -= 1
+          end
+        end
+      end
+      def text(data)
+        case @target_li_level
+        when 0
+          if @key
+            @data << data
+          else
+            case data.gsub(/[ \t\n]+/, " ")
+            when /\ASource: /
+              @key = :source
+              @data = $POSTMATCH
+            when /\APreprocessing: /
+              @key = :preprocessing
+              @data = $POSTMATCH
+            when /\A\# of classes: (\d+)/
+              @key = :n_classes
+              @data = Integer($1, 10)
+            when /\A\# of data: ([\d,]+)/
+              @key = :n_data
+              @data = Integer($1.gsub(/,/, ""), 10)
+            when /\A\# of features: ([\d,]+)/
+              @key = :n_features
+              @data = Integer($1.gsub(/,/, ""), 10)
+            when /\AFiles:/
+              @key = :files
+              @data = []
+            end
+          end
+        when 1
+          if @file.name.nil?
+            @file.name = data
+          else
+            @file.note = data.strip.gsub(/[()]/, "")
           end
         end
       end
     end
+    class DescriptionListener
+      include REXML::StreamListener
+      def initialize(abort_tag, description)
+        @abort_tag = abort_tag
+        @description = description
+        @in_content = false
+        @p = nil
+      end
+      def tag_start(name, attributes)
+        case name
+        when "p"
+          @in_content = true
+          @p = []
+        when "br"
+          @description << @p.join(" ")
+          @p = []
+        when "hr"
+          throw(@abort_tag)
+        end
+      end
+      def tag_end(name)
+        case name
+        when "p"
+          @description << @p.join(" ")
+        end
+      end
+      def text(data)
+        return unless @in_content
+        content = data.gsub(/[ \t\n]+/, " ").strip
+        @p << content unless content.empty?
+      end
+    end
   end
 end

data/lib/datasets/libsvm.rb CHANGED

@@ -103,15 +103,7 @@ module Datasets
         download(data_path, @file.url)
       end
       if data_path.extname == ".bz2"
-        input, output = IO.pipe
-        pid = spawn("bzcat", data_path.to_s, {:out => output})
-        begin
-          output.close
-          yield(input)
-        ensure
-          input.close
-          Process.waitpid(pid)
-        end
+        extract_bz2(data_path, &block)
       else
         File.open(data_path, &block)
       end

data/lib/datasets/mushroom.rb ADDED

@@ -0,0 +1,256 @@
+require "csv"
+require_relative "dataset"
+module Datasets
+  class Mushroom < Dataset
+    Record = Struct.new(
+      :label,
+      :cap_shape,
+      :cap_surface,
+      :cap_color,
+      :bruises,
+      :odor,
+      :gill_attachment,
+      :gill_spacing,
+      :gill_size,
+      :gill_color,
+      :stalk_shape,
+      :stalk_root,
+      :stalk_surface_above_ring,
+      :stalk_surface_below_ring,
+      :stalk_color_above_ring,
+      :stalk_color_below_ring,
+      :veil_type,
+      :veil_color,
+      :n_rings,
+      :ring_type,
+      :spore_print_color,
+      :population,
+      :habitat,
+    )
+    def initialize
+      super()
+      @metadata.id = "mushroom"
+      @metadata.name = "Mushroom"
+      @metadata.url = "https://archive.ics.uci.edu/ml/datasets/mushroom"
+      @metadata.description = lambda do
+        read_names
+      end
+    end
+    def each
+      return to_enum(__method__) unless block_given?
+      open_data do |csv|
+        csv.each do |row|
+          next if row[0].nil?
+          record = Record.new(*row)
+          record.members.each do |member|
+            record[member] = CONVERTERS[member][record[member]]
+          end
+          yield(record)
+        end
+      end
+    end
+    private
+    def open_data
+      data_path = cache_dir_path + "agaricus-lepiota.data"
+      unless data_path.exist?
+        data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
+        download(data_path, data_url)
+      end
+      CSV.open(data_path) do |csv|
+        yield(csv)
+      end
+    end
+    def read_names
+      names_path = cache_dir_path + "agaricus-lepiota.names"
+      unless names_path.exist?
+        names_url = "https://archive.ics.uci.edu/ml/machine-learning-databases//mushroom/agaricus-lepiota.names"
+        download(names_path, names_url)
+      end
+      names_path.read
+    end
+    CONVERTERS = {
+      label: {
+        "p" => "poisonous",
+        "e" => "edible",
+      },
+      cap_shape: {
+        "b" => "bell",
+        "c" => "conical",
+        "x" => "convex",
+        "f" => "flat",
+        "k" => "knobbed",
+        "s" => "sunken",
+      },
+      cap_surface: {
+        "f" => "fibrous",
+        "g" => "grooves",
+        "y" => "scaly",
+        "s" => "smooth",
+      },
+      cap_color: {
+        "n" => "brown",
+        "b" => "buff",
+        "c" => "cinnamon",
+        "g" => "gray",
+        "r" => "green",
+        "p" => "pink",
+        "u" => "purple",
+        "e" => "red",
+        "w" => "white",
+        "y" => "yellow",
+      },
+      bruises: {
+        "t" => "bruises",
+        "f" => "no",
+      },
+      odor: {
+        "a" => "almond",
+        "l" => "anise",
+        "c" => "creosote",
+        "y" => "fishy",
+        "f" => "foul",
+        "m" => "musty",
+        "n" => "none",
+        "p" => "pungent",
+        "s" => "spicy",
+      },
+      gill_attachment: {
+        "a" => "attached",
+        "d" => "descending",
+        "f" => "free",
+        "n" => "notched",
+      },
+      gill_spacing: {
+        "c" => "close",
+        "w" => "crowded",
+        "d" => "distant",
+      },
+      gill_size: {
+        "b" => "broad",
+        "n" => "narrow",
+      },
+      gill_color: {
+        "k" => "black",
+        "n" => "brown",
+        "b" => "buff",
+        "h" => "chocolate",
+        "g" => "gray",
+        "r" => "green",
+        "o" => "orange",
+        "p" => "pink",
+        "u" => "purple",
+        "e" => "red",
+        "w" => "white",
+        "y" => "yellow",
+      },
+      stalk_shape: {
+        "e" => "enlarging",
+        "t" => "tapering",
+      },
+      stalk_root: {
+        "b" => "bulbous",
+        "c" => "club",
+        "u" => "cup",
+        "e" => "equal",
+        "z" => "rhizomorphs",
+        "r" => "rooted",
+        "?" => "missing",
+      },
+      stalk_surface_above_ring: {
+        "f" => "fibrous",
+        "y" => "scaly",
+        "k" => "silky",
+        "s" => "smooth",
+      },
+      stalk_surface_below_ring: {
+        "f" => "fibrous",
+        "y" => "scaly",
+        "k" => "silky",
+        "s" => "smooth",
+      },
+      stalk_color_above_ring: {
+        "n" => "brown",
+        "b" => "buff",
+        "c" => "cinnamon",
+        "g" => "gray",
+        "o" => "orange",
+        "p" => "pink",
+        "e" => "red",
+        "w" => "white",
+        "y" => "yellow",
+      },
+      stalk_color_below_ring: {
+        "n" => "brown",
+        "b" => "buff",
+        "c" => "cinnamon",
+        "g" => "gray",
+        "o" => "orange",
+        "p" => "pink",
+        "e" => "red",
+        "w" => "white",
+        "y" => "yellow",
+      },
+      veil_type: {
+        "p" => "partial",
+        "u" => "universal",
+      },
+      veil_color: {
+        "n" => "brown",
+        "o" => "orange",
+        "w" => "white",
+        "y" => "yellow",
+      },
+      n_rings: {
+        "n" => 0,
+        "o" => 1,
+        "t" => 2,
+      },
+      ring_type: {
+        "c" => "cobwebby",
+        "e" => "evanescent",
+        "f" => "flaring",
+        "l" => "large",
+        "n" => "none",
+        "p" => "pendant",
+        "s" => "sheathing",
+        "z" => "zone",
+      },
+      spore_print_color: {
+        "k" => "black",
+        "n" => "brown",
+        "b" => "buff",
+        "h" => "chocolate",
+        "r" => "green",
+        "o" => "orange",
+        "u" => "purple",
+        "w" => "white",
+        "y" => "yellow",
+      },
+      population: {
+        "a" => "abundant",
+        "c" => "clustered",
+        "n" => "numerous",
+        "s" => "scattered",
+        "v" => "several",
+        "y" => "solitary",
+      },
+      habitat: {
+        "g" => "grasses",
+        "l" => "leaves",
+        "m" => "meadows",
+        "p" => "paths",
+        "u" => "urban",
+        "w" => "waste",
+        "d" => "woods",
+      }
+    }
+  end
+end

data/lib/datasets/table.rb CHANGED

@@ -2,19 +2,99 @@ require "datasets/dictionary"
 module Datasets
   class Table
+    class Record
+      include Enumerable
+      def initialize(table, index)
+        @table = table
+        @index = index
+      end
+      def [](column_name_or_column_index)
+        @table[column_name_or_column_index][@index]
+      end
+      def each
+        return to_enum(__method__) unless block_given?
+        @table.each_column.each do |column_name, column_values|
+          yield(column_name, column_values[@index])
+        end
+      end
+      def values
+        @table.each_column.collect do |_column_name, column_values|
+          column_values[@index]
+        end
+      end
+      def to_h
+        hash = {}
+        each do |column_name, column_value|
+          hash[column_name] = column_value
+        end
+        hash
+      end
+      def inspect
+        "#<#{self.class.name} #{@table.dataset.metadata.name}[#{@index}] #{to_h.inspect}>"
+      end
+    end
     include Enumerable
+    attr_reader :dataset
     def initialize(dataset)
       @dataset = dataset
       @dictionaries = {}
     end
-    def each(&block)
+    def n_columns
+      columner_data.size
+    end
+    alias_method :size, :n_columns
+    alias_method :length, :n_columns
+    def n_rows
+      first_column = columner_data.first
+      return 0 if first_column.nil?
+      first_column[1].size
+    end
+    def column_names
+      columner_data.keys
+    end
+    def each_column(&block)
       columner_data.each(&block)
     end
+    alias_method :each, :each_column
-    def [](name)
-      columner_data[normalize_name(name)]
+    def each_record
+      return to_enum(__method__) unless block_given?
+      n_rows.times do |i|
+        yield(Record.new(self, i))
+      end
+    end
+    def find_record(row)
+      row += n_rows if row < 0
+      return nil if row < 0
+      return nil if row >= n_rows
+      Record.new(self, row)
+    end
+    def [](name_or_index)
+      case name_or_index
+      when Integer
+        index = name_or_index
+        columner_data.each_with_index do |(_name, values), i|
+          return values if i == index
+        end
+        nil
+      else
+        name = name_or_index
+        columner_data[normalize_name(name)]
+      end
     end
     def dictionary_encode(name)

data/lib/datasets/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Datasets
-  VERSION = "0.0.8"
+  VERSION = "0.0.9"
 end

data/lib/datasets/wikipedia.rb CHANGED

@@ -52,7 +52,7 @@ module Datasets
     end
     private
-    def open_data
+    def open_data(&block)
       base_name = "#{@language}wiki-latest-#{type_in_path}.xml.bz2"
       data_path = cache_dir_path + base_name
       unless data_path.exist?
@@ -60,15 +60,7 @@ module Datasets
         download(data_path, data_url)
       end
-      input, output = IO.pipe
-      pid = spawn("bzcat", data_path.to_s, {:out => output})
-      begin
-        output.close
-        yield(input)
-      ensure
-        input.close
-        Process.waitpid(pid)
-      end
+      extract_bz2(data_path, &block)
     end
     def type_in_path

data/test/test-mushroom.rb ADDED

@@ -0,0 +1,80 @@
+class MushroomTest < Test::Unit::TestCase
+  def setup
+    @dataset = Datasets::Mushroom.new
+  end
+  def record(*args)
+    Datasets::Mushroom::Record.new(*args)
+  end
+  test("#each") do
+    records = @dataset.each.to_a
+    assert_equal([
+                   8124,
+                   {
+                     :label => "poisonous",
+                     :cap_shape => "convex",
+                     :cap_surface => "smooth",
+                     :cap_color => "brown",
+                     :bruises => "bruises",
+                     :odor => "pungent",
+                     :gill_attachment => "free",
+                     :gill_spacing => "close",
+                     :gill_size => "narrow",
+                     :gill_color => "black",
+                     :stalk_shape => "enlarging",
+                     :stalk_root => "equal",
+                     :stalk_surface_above_ring => "smooth",
+                     :stalk_surface_below_ring => "smooth",
+                     :stalk_color_above_ring => "white",
+                     :stalk_color_below_ring => "white",
+                     :veil_type => "partial",
+                     :veil_color => "white",
+                     :n_rings => 1,
+                     :ring_type => "pendant",
+                     :spore_print_color => "black",
+                     :population => "scattered",
+                     :habitat => "urban"
+                   },
+                   {
+                     :label => "edible",
+                     :cap_shape => "convex",
+                     :cap_surface => "smooth",
+                     :cap_color => "brown",
+                     :bruises => "no",
+                     :odor => "none",
+                     :gill_attachment => "attached",
+                     :gill_spacing => "close",
+                     :gill_size => "broad",
+                     :gill_color => "yellow",
+                     :stalk_shape => "enlarging",
+                     :stalk_root => "missing",
+                     :stalk_surface_above_ring => "smooth",
+                     :stalk_surface_below_ring => "smooth",
+                     :stalk_color_above_ring => "orange",
+                     :stalk_color_below_ring => "orange",
+                     :veil_type => "partial",
+                     :veil_color => "orange",
+                     :n_rings => 1,
+                     :ring_type => "pendant",
+                     :spore_print_color => "orange",
+                     :population => "clustered",
+                     :habitat => "leaves"
+                   }
+                 ],
+                 [
+                   records.size,
+                   records[0].to_h,
+                   records[-1].to_h
+                 ])
+  end
+  sub_test_case("#metadata") do
+    test("#description") do
+      description = @dataset.metadata.description
+      assert do
+        description.start_with?("1. Title: Mushroom Database")
+      end
+    end
+  end
+end

data/test/test-table.rb CHANGED

@@ -3,9 +3,129 @@ class TableTest < Test::Unit::TestCase
     @table = Datasets::Iris.new.to_table
   end
-  test("#[]") do
-    assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
-                 @table[:petal_length].first(5))
+  test("#n_columns") do
+    assert_equal(5, @table.n_columns)
+  end
+  test("#n_rows") do
+    assert_equal(150, @table.n_rows)
+  end
+  test("#column_names") do
+    assert_equal([
+                   :sepal_length,
+                   :sepal_width,
+                   :petal_length,
+                   :petal_width,
+                   :label,
+                 ],
+                 @table.column_names)
+  end
+  test("#each") do
+    shorten_hash = {}
+    @table.each do |name, values|
+      shorten_hash[name] = values.first(5)
+    end
+    assert_equal({
+                   :label        => ["Iris-setosa"] * 5,
+                   :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
+                   :petal_width  => [0.2, 0.2, 0.2, 0.2, 0.2],
+                   :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
+                   :sepal_width  => [3.5, 3.0, 3.2, 3.1, 3.6],
+                 },
+                 shorten_hash)
+  end
+  test("#each_column") do
+    shorten_hash = {}
+    @table.each_column do |name, values|
+      shorten_hash[name] = values.first(5)
+    end
+    assert_equal({
+                   :label        => ["Iris-setosa"] * 5,
+                   :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
+                   :petal_width  => [0.2, 0.2, 0.2, 0.2, 0.2],
+                   :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
+                   :sepal_width  => [3.5, 3.0, 3.2, 3.1, 3.6],
+                 },
+                 shorten_hash)
+  end
+  test("#each_record") do
+    records = []
+    @table.each_record do |record|
+      records << record
+      break if records.size == 3
+    end
+    assert_equal([
+                   {
+                     label: "Iris-setosa",
+                     petal_length: 1.4,
+                     petal_width: 0.2,
+                     sepal_length: 5.1,
+                     sepal_width: 3.5,
+                   },
+                   {
+                     label: "Iris-setosa",
+                     petal_length: 1.4,
+                     petal_width: 0.2,
+                     sepal_length: 4.9,
+                     sepal_width: 3.0,
+                   },
+                   {
+                     label: "Iris-setosa",
+                     petal_length: 1.3,
+                     petal_width: 0.2,
+                     sepal_length: 4.7,
+                     sepal_width: 3.2,
+                   },
+                 ],
+                 records.collect(&:to_h))
+  end
+  sub_test_case("#find_record") do
+    test("positive") do
+      assert_equal({
+                     label: "Iris-setosa",
+                     petal_length: 1.4,
+                     petal_width: 0.2,
+                     sepal_length: 4.9,
+                     sepal_width: 3.0,
+                   },
+                   @table.find_record(1).to_h)
+    end
+    test("positive - over") do
+      assert_nil(@table.find_record(151))
+    end
+    test("negative") do
+      assert_equal({
+                     label: "Iris-virginica",
+                     petal_length: 5.1,
+                     petal_width: 1.8,
+                     sepal_length: 5.9,
+                     sepal_width: 3.0,
+                   },
+                   @table.find_record(-1).to_h)
+    end
+    test("negative - over") do
+      assert_nil(@table.find_record(-151))
+    end
+  end
+  sub_test_case("#[]") do
+    test("index") do
+      assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
+                   @table[2].first(5))
+    end
+    test("name") do
+      assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
+                   @table[:petal_length].first(5))
+    end
   end
   test("#dictionary_encode") do
@@ -58,21 +178,6 @@ class TableTest < Test::Unit::TestCase
     end
   end
-  test("#each") do
-    shorten_hash = {}
-    @table.each do |name, values|
-      shorten_hash[name] = values.first(5)
-    end
-    assert_equal({
-                   :label        => ["Iris-setosa"] * 5,
-                   :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
-                   :petal_width  => [0.2, 0.2, 0.2, 0.2, 0.2],
-                   :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
-                   :sepal_width  => [3.5, 3.0, 3.2, 3.1, 3.6],
-                 },
-                 shorten_hash)
-  end
   test("#to_h") do
     shorten_hash = {}
     @table.to_h.each do |name, values|

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: red-datasets
 version: !ruby/object:Gem::Version
-  version: 0.0.8
+  version: 0.0.9
 platform: ruby
 authors:
 - tomisuker
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-03-24 00:00:00.000000000 Z
+date: 2019-09-09 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: csv
@@ -138,6 +138,7 @@ files:
 - lib/datasets/libsvm.rb
 - lib/datasets/metadata.rb
 - lib/datasets/mnist.rb
+- lib/datasets/mushroom.rb
 - lib/datasets/penn-treebank.rb
 - lib/datasets/postal-code-japan.rb
 - lib/datasets/table.rb
@@ -155,6 +156,7 @@ files:
 - test/test-libsvm-dataset-list.rb
 - test/test-libsvm.rb
 - test/test-mnist.rb
+- test/test-mushroom.rb
 - test/test-penn-treebank.rb
 - test/test-postal-code-japan.rb
 - test/test-table.rb
@@ -180,23 +182,24 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.7.6
+rubygems_version: 2.7.6.2
 signing_key:
 specification_version: 4
 summary: Red Datasets provides classes that provide common datasets such as iris dataset.
 test_files:
-- test/test-iris.rb
-- test/test-wikipedia.rb
-- test/test-fashion-mnist.rb
-- test/test-wine.rb
-- test/test-postal-code-japan.rb
-- test/test-mnist.rb
-- test/helper.rb
 - test/test-adult.rb
 - test/test-libsvm.rb
-- test/run-test.rb
-- test/test-table.rb
-- test/test-cifar.rb
+- test/test-wikipedia.rb
 - test/test-libsvm-dataset-list.rb
+- test/helper.rb
+- test/test-iris.rb
+- test/test-table.rb
+- test/run-test.rb
+- test/test-wine.rb
 - test/test-penn-treebank.rb
+- test/test-postal-code-japan.rb
+- test/test-cifar.rb
+- test/test-mnist.rb
+- test/test-mushroom.rb
 - test/test-dictionary.rb
+- test/test-fashion-mnist.rb