RubyGems - red-datasets - Versions diffs - 0.1.7 → 0.1.8 - Mend

red-datasets 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +4 -4
data/README.md +2 -0
data/Rakefile +10 -0
data/doc/text/news.md +29 -0
data/lib/datasets/california-housing.rb +1 -1
data/lib/datasets/dataset.rb +2 -2
data/lib/datasets/downloader.rb +34 -16
data/lib/datasets/fashion-mnist.rb +6 -2
data/lib/datasets/ggplot2-dataset.rb +3 -3
data/lib/datasets/house-of-councillor.rb +169 -0
data/lib/datasets/house-of-representative.rb +107 -0
data/lib/datasets/japanese-date-parser.rb +38 -0
data/lib/datasets/kuzushiji-mnist.rb +6 -2
data/lib/datasets/lazy.rb +2 -0
data/lib/datasets/libsvm-dataset-list.rb +1 -1
data/lib/datasets/mnist.rb +12 -6
data/lib/datasets/nagoya-university-conversation-corpus.rb +2 -2
data/lib/datasets/postal-code-japan.rb +3 -3
data/lib/datasets/quora-duplicate-question-pair.rb +1 -1
data/lib/datasets/version.rb +1 -1
data/lib/datasets/wikipedia-kyoto-japanese-english.rb +2 -2
data/lib/datasets/wikipedia.rb +2 -2
data/test/japanese-date-parser-test.rb +27 -0
data/test/test-adult.rb +36 -86
data/test/test-aozora-bunko.rb +5 -5
data/test/test-california-housing.rb +12 -31
data/test/test-cldr-plurals.rb +1 -1
data/test/test-diamonds.rb +13 -33
data/test/test-downloader.rb +1 -1
data/test/test-geolonia.rb +17 -41
data/test/test-house-of-councillor.rb +223 -0
data/test/test-house-of-representative.rb +54 -0
data/test/test-nagoya-university-conversation-corpus.rb +17 -69
data/test/test-postal-code-japan.rb +7 -0
data/test/test-quora-duplicate-question-pair.rb +7 -21
data/test/test-rdataset.rb +24 -22
data/test/test-sudachi-synonym-dictionary.rb +12 -31
data/test/test-wikipedia.rb +5 -5
metadata +12 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0231a4f9da16ad1b2cb562a360468b3450fec684e2bcf0ca500195499e8f7397
-  data.tar.gz: c2938b6d72fea58413a743ccad111fc8c95699d9b417e64941ca1681128a1706
+  metadata.gz: deff1c4d13294030c25c06691e272881eb049274decd9e2a958d0486c792ef26
+  data.tar.gz: e48151e045fbf343291a5f5770409dea7672ff39c75bcc617152b52a39890f31
 SHA512:
-  metadata.gz: 2ea402beb78be117e28ca906490526a28a8d1e1430181a89432953d26eb0b0a8ea70d0c582d438c5e6b668b3a48eb60db336ef8174fa4dd1a52e20c19f5b4d9b
-  data.tar.gz: 38b59c46e875ae61ab0794f2f9f474969b7e4e08e06078ca55ce4d1930d9538647e6b0152c1c48e8f0d45318ed2f71801dc99152d0799e06459937fb3d29978d
+  metadata.gz: 7ae5a39a716bfa719f937c6ff139bd90ccf625e8a7ce72298507506f1a2c6a100e81c1273bf395a40d0ae8f1a13c1b1fb554e123e49dff718607a734ffd6c67b
+  data.tar.gz: 72e664ba5f2a569a92d9d4237588f92f7398621642486e4c9e6c930bade4c17e4a68d30da14cb277fcd602025f959c42284462bf4370872f9826c9634ff78aca

data/README.md CHANGED Viewed

@@ -29,6 +29,8 @@ You can use datasets easily because you can access each dataset with multiple wa
 * Fuel Economy Dataset
 * Geolonia Japanese Addresses
 * Hepatitis
+* House of Councillors of Japan
+* House of Representatives of Japan
 * Iris Dataset
 * Libsvm
 * MNIST database

data/Rakefile CHANGED Viewed

@@ -13,6 +13,16 @@ end
 helper.install
 spec = helper.gemspec
+release_task = Rake.application["release"]
+# We use Trusted Publishing.
+release_task.prerequisites.delete("build")
+release_task.prerequisites.delete("release:rubygem_push")
+release_task_comment = release_task.comment
+if release_task_comment
+  release_task.clear_comments
+  release_task.comment = release_task_comment.gsub(/ and build.*$/, "")
+end
 task default: :test
 desc "Run tests"

data/doc/text/news.md CHANGED Viewed

@@ -1,5 +1,34 @@
 # News
+## 0.1.8 - 2025-02-07
+### Improvements
+  * Suppressed "literal string will be frozen" warnings.
+    * Patch by Tsutomu Katsube
+  * `Datasets::HouseOfCouncillor`: Added.
+    * [GH-143](https://github.com/red-data-tools/red-datasets/issues/143)
+    * [GH-181](https://github.com/red-data-tools/red-datasets/issues/181)
+    * Patch by Tsutomu Katsube
+  * `Datasets::HouseOfRepresentative`: Added.
+    * [GH-143](https://github.com/red-data-tools/red-datasets/issues/142)
+    * [GH-181](https://github.com/red-data-tools/red-datasets/issues/184)
+    * Patch by Tsutomu Katsube
+### Thanks
+  * Tsutomu Katsube
 ## 0.1.7 - 2023-05-29
 ### Improvements

data/lib/datasets/california-housing.rb CHANGED Viewed

@@ -36,7 +36,7 @@ Available from http://lib.stat.cmu.edu/datasets/.
       file_name = "cadata.txt"
       download(data_path, data_url)
       open_data(data_path, file_name) do |input|
-        data = ""
+        data = +""
         input.each_line do |line|
           next unless line.start_with?(" ")
           data << line.lstrip.gsub(/ +/, ",")

data/lib/datasets/dataset.rb CHANGED Viewed

@@ -33,8 +33,8 @@ module Datasets
       @cache_path ||= CachePath.new(@metadata.id)
     end
-    def download(output_path, url, &block)
-      downloader = Downloader.new(url)
+    def download(output_path, url, *fallback_urls, &block)
+      downloader = Downloader.new(url, *fallback_urls)
       downloader.download(output_path, &block)
     end

data/lib/datasets/downloader.rb CHANGED Viewed

@@ -6,20 +6,15 @@ end
 require "net/http"
 require "pathname"
+require_relative "error"
 module Datasets
   class Downloader
-    class TooManyRedirects < StandardError; end
+    class TooManyRedirects < Error; end
-    def initialize(url)
-      if url.is_a?(URI::Generic)
-        url = url.dup
-      else
-        url = URI.parse(url)
-      end
-      @url = url
-      unless @url.is_a?(URI::HTTP)
-        raise ArgumentError, "download URL must be HTTP or HTTPS: <#{@url}>"
-      end
+    def initialize(url, *fallback_urls)
+      @url = normalize_url(url)
+      @fallback_urls = fallback_urls.collect { |fallback_url| normalize_url(fallback_url) }
     end
     def download(output_path, &block)
@@ -45,7 +40,7 @@ module Datasets
             headers["Range"] = "bytes=#{start}-"
           end
-          start_http(@url, headers) do |response|
+          start_http(@url, @fallback_urls, headers) do |response|
             if response.is_a?(Net::HTTPPartialContent)
               mode = "ab"
             else
@@ -85,6 +80,18 @@ module Datasets
       end
     end
+    private def normalize_url(url)
+      if url.is_a?(URI::Generic)
+        url = url.dup
+      else
+        url = URI.parse(url)
+      end
+      unless url.is_a?(URI::HTTP)
+        raise ArgumentError, "download URL must be HTTP or HTTPS: <#{url}>"
+      end
+      url
+    end
     private def synchronize(output_path, partial_output_path)
       begin
         Process.getpgid(Process.pid)
@@ -104,7 +111,8 @@ module Datasets
           rescue ArgumentError
             # The process that acquired the lock will be exited before
             # it stores its process ID.
-            valid_lock_path = (lock_path.mtime > 10)
+            elapsed_time = Time.now - lock_path.mtime
+            valid_lock_path = (elapsed_time > 10)
           else
             begin
               Process.getpgid(pid)
@@ -133,7 +141,7 @@ module Datasets
       end
     end
-    private def start_http(url, headers, limit = 10, &block)
+    private def start_http(url, fallback_urls, headers, limit = 10, &block)
       if limit == 0
         raise TooManyRedirects, "too many redirections: #{url}"
       end
@@ -151,8 +159,18 @@ module Datasets
           when Net::HTTPRedirection
             url = URI.parse(response[:location])
             $stderr.puts "Redirect to #{url}"
-            return start_http(url, headers, limit - 1, &block)
+            return start_http(url, fallback_urls, headers, limit - 1, &block)
           else
+            if response.is_a?(Net::HTTPForbidden)
+              next_url, *rest_fallback_urls = fallback_urls
+              if next_url
+                message = "#{response.code}: #{response.message}: " +
+                          "fallback: <#{url}> -> <#{next_url}>"
+                $stderr.puts(message)
+                return start_http(next_url, rest_fallback_urls, headers, &block)
+              end
+            end
             message = response.code
             if response.message and not response.message.empty?
               message += ": #{response.message}"
@@ -167,7 +185,7 @@ module Datasets
     private def yield_chunks(path)
       path.open("rb") do |output|
         chunk_size = 1024 * 1024
-        chunk = ""
+        chunk = +""
         while output.read(chunk_size, chunk)
           yield(chunk)
         end

data/lib/datasets/fashion-mnist.rb CHANGED Viewed

@@ -2,9 +2,13 @@ require_relative 'mnist'
 module Datasets
   class FashionMNIST < MNIST
-    BASE_URL = "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/"
     private
+    def base_urls
+      [
+        "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/",
+      ]
+    end
     def dataset_name
       "Fashion-MNIST"
     end

data/lib/datasets/ggplot2-dataset.rb CHANGED Viewed

@@ -17,7 +17,7 @@ module Datasets
       data_path = cache_dir_path + data_base_name
       data_url = "#{download_base_url}/data-raw/#{data_base_name}"
       download(data_path, data_url)
-      CSV.open(data_path, headers: :first_row, converters: :all) do |csv|
+      CSV.open(data_path, headers: :first_row, converters: :numeric) do |csv|
         record_class = self.class::Record
         csv.each do |row|
           record = record_class.new(*row.fields)
@@ -37,7 +37,7 @@ module Datasets
       data_r_url = "#{download_base_url}/R/#{data_r_base_name}"
       download(data_r_path, data_r_url)
       descriptions = {}
-      comment = ""
+      comment = +""
       File.open(data_r_path) do |data_r|
         data_r.each_line do |line|
           case line.chomp
@@ -51,7 +51,7 @@ module Datasets
           when /\A"(.+)"\z/
             name = Regexp.last_match[1]
             descriptions[name] = parse_roxygen(comment.rstrip)
-            comment = ""
+            comment = +""
           end
         end
         descriptions[@ggplot2_dataset_name]

data/lib/datasets/house-of-councillor.rb ADDED Viewed

@@ -0,0 +1,169 @@
+require_relative "dataset"
+module Datasets
+  class HouseOfCouncillor < Dataset
+    Bill = Struct.new(:council_time,
+                      :bill_type,
+                      :submit_time,
+                      :submit_number,
+                      :title,
+                      :bill_url,
+                      :bill_summary_url,
+                      :proposed_bill_url,
+                      :proposed_on,
+                      :proposed_on_from_house_of_representatives,
+                      :proposed_on_to_house_of_representatives,
+                      :prior_deliberations_type,
+                      :continuation_type,
+                      :proposers,
+                      :submitter,
+                      :submitter_type,
+                      :progress_of_house_of_councillors_committees_etc_refer_on,
+                      :progress_of_house_of_councillors_committees_etc_committee_etc,
+                      :progress_of_house_of_councillors_committees_etc_pass_on,
+                      :progress_of_house_of_councillors_committees_etc_result,
+                      :progress_of_house_of_councillors_plenary_sitting_pass_on,
+                      :progress_of_house_of_councillors_plenary_sitting_result,
+                      :progress_of_house_of_councillors_plenary_sitting_committees,
+                      :progress_of_house_of_councillors_plenary_sitting_vote_type,
+                      :progress_of_house_of_councillors_plenary_sitting_vote_method,
+                      :progress_of_house_of_councillors_plenary_sitting_result_url,
+                      :progress_of_house_of_representatives_committees_etc_refer_on,
+                      :progress_of_house_of_representatives_committees_etc_committee_etc,
+                      :progress_of_house_of_representatives_committees_etc_pass_on,
+                      :progress_of_house_of_representatives_committees_etc_result,
+                      :progress_of_house_of_representatives_plenary_sitting_pass_on,
+                      :progress_of_house_of_representatives_plenary_sitting_result,
+                      :progress_of_house_of_representatives_plenary_sitting_committees,
+                      :progress_of_house_of_representatives_plenary_sitting_vote_type,
+                      :progress_of_house_of_representatives_plenary_sitting_vote_method,
+                      :promulgated_on,
+                      :law_number,
+                      :entracted_law_url,
+                      :notes)
+    InHouseGroup = Struct.new(:in_house_group_name_and_abbreviation_on,
+                              :in_house_group_name,
+                              :in_house_group_abbreviation,
+                              :number_of_members_on,
+                              :number_of_members,
+                              :number_of_women_members,
+                              :first_term_expires_on,
+                              :first_term_proportional_representation_number_of_members,
+                              :first_term_proportional_representation_number_of_women_members,
+                              :first_term_election_district_number_of_members,
+                              :first_term_election_district_number_of_women_members,
+                              :first_term_total_number_of_members,
+                              :first_term_total_number_of_women_members,
+                              :second_term_expires_on,
+                              :second_term_proportional_representation_number_of_members,
+                              :second_term_proportional_representation_number_of_women_members,
+                              :second_term_election_district_number_of_members,
+                              :second_term_election_district_number_of_women_members,
+                              :second_term_total_number_of_members,
+                              :second_term_total_number_of_women_members)
+    Member = Struct.new(:professional_name,
+                        :true_name,
+                        :profile_url,
+                        :professional_name_reading,
+                        :in_house_group_abbreviation,
+                        :constituency,
+                        :expiration_of_term,
+                        :photo_url,
+                        :elected_years,
+                        :elected_number,
+                        :responsibilities,
+                        :responsibility_on,
+                        :career,
+                        :career_on)
+    Question = Struct.new(:submit_time,
+                          :submit_number,
+                          :title,
+                          :submitter,
+                          :number_of_submissions,
+                          :question_for_text_html_url,
+                          :answer_for_text_html_url,
+                          :question_for_text_pdf_url,
+                          :answer_for_text_pdf_url,
+                          :question_url,
+                          :submitted_on,
+                          :transfered_on,
+                          :received_answer_on,
+                          :notes)
+    VALID_TYPES = [
+      :bill,
+      :in_house_group,
+      :member,
+      :question
+    ]
+    def initialize(type: :bill)
+      super()
+      @type = type
+      unless VALID_TYPES.include?(type)
+        message = +":type must be one of ["
+        message << VALID_TYPES.collect(&:inspect).join(", ")
+        message << "]: #{@type.inspect}"
+        raise ArgumentError, message
+      end
+      @metadata.id = "house-of-councillor"
+      @metadata.name = "Bill, in-House group, member and question of the House of Councillors of Japan"
+      @metadata.url = "https://smartnews-smri.github.io/house-of-councillors"
+      @metadata.licenses = ["MIT"]
+      @metadata.description = "The House of Councillors of Japan (type: #{@type})"
+    end
+    def each
+      return to_enum(__method__) unless block_given?
+      open_data do |csv|
+        csv.each do |row|
+          case @type
+          when :bill
+            record = Bill.new(*row.fields)
+          when :in_house_group
+            record = InHouseGroup.new(*row.fields)
+          when :member
+            %w(当選年).each do |ints_column_name|
+              row[ints_column_name] = parse_ints(row[ints_column_name])
+            end
+            record = Member.new(*row.fields)
+          when :question
+            record = Question.new(*row.fields)
+          end
+          yield(record)
+        end
+      end
+    end
+    private
+    def open_data
+      data_url = +"https://smartnews-smri.github.io/house-of-councillors/data"
+      case @type
+      when :bill
+        data_url << "/gian.csv"
+      when :in_house_group
+        data_url << "/kaiha.csv"
+      when :member
+        data_url << "/giin.csv"
+      when :question
+        data_url << "/syuisyo.csv"
+      end
+      data_path = cache_dir_path + "#{@type}.csv"
+      download(data_path, data_url)
+      CSV.open(data_path, col_sep: ",", headers: true, converters: %i(date integer)) do |csv|
+        yield(csv)
+      end
+    end
+    def parse_ints(column_value)
+      column_value.to_s.split("、").collect(&:to_i)
+    end
+  end
+end

data/lib/datasets/house-of-representative.rb ADDED Viewed

@@ -0,0 +1,107 @@
+require_relative "dataset"
+require_relative "japanese-date-parser"
+module Datasets
+  class HouseOfRepresentative < Dataset
+    Record = Struct.new(:carry_time,
+                        :caption,
+                        :type,
+                        :submit_time,
+                        :submit_number,
+                        :title,
+                        :discussion_status,
+                        :progress,
+                        :progress_url,
+                        :text,
+                        :text_url,
+                        :bill_type,
+                        :submitter,
+                        :submitter_in_house_groups,
+                        :house_of_representatives_of_accepted_bill_on_preliminary_consideration,
+                        :house_of_representatives_of_preliminary_refer_on,
+                        :house_of_representatives_of_preliminary_refer_commission,
+                        :house_of_representatives_of_accepted_bill_on,
+                        :house_of_representatives_of_refer_on,
+                        :house_of_representatives_of_refer_commission,
+                        :house_of_representatives_of_finished_consideration_on,
+                        :house_of_representatives_of_consideration_result,
+                        :house_of_representatives_of_finished_deliberation_on,
+                        :house_of_representatives_of_deliberation_result,
+                        :house_of_representatives_of_attitude_of_in_house_group_during_deliberation,
+                        :house_of_representatives_of_support_in_house_group_during_deliberation,
+                        :house_of_representatives_of_opposition_in_house_group_during_deliberation,
+                        :house_of_councillors_of_accepted_bill_on_preliminary_consideration,
+                        :house_of_councillors_of_preliminary_refer_on,
+                        :house_of_councillors_of_preliminary_refer_commission,
+                        :house_of_councillors_of_accepted_bill_on,
+                        :house_of_councillors_of_refer_on,
+                        :house_of_councillors_of_refer_commission,
+                        :house_of_councillors_of_finished_consideration_on,
+                        :house_of_councillors_of_consideration_result,
+                        :house_of_councillors_of_finished_deliberation_on,
+                        :house_of_councillors_of_deliberation_result,
+                        :promulgated_on,
+                        :law_number,
+                        :submitters,
+                        :supporters_of_submitted_bill)
+    def initialize
+      super()
+      @metadata.id = "house-of-representative"
+      @metadata.name = "Bill of the House of Representatives of Japan"
+      @metadata.url = "https://smartnews-smri.github.io/house-of-representatives"
+      @metadata.licenses = ["MIT"]
+      @metadata.description = "Bill of the House of Representatives of Japan"
+    end
+    def each
+      return to_enum(__method__) unless block_given?
+      open_data do |csv|
+        csv.each do |row|
+          record = Record.new(*row.fields)
+          yield(record)
+        end
+      end
+    end
+    private
+    def open_data
+      data_url = "https://raw.githubusercontent.com/smartnews-smri/house-of-representatives/main/data/gian.csv"
+      data_path = cache_dir_path + "gian.csv"
+      download(data_path, data_url)
+      parser = JapaneseDateParser.new
+      japanese_date_converter = lambda do |field, info|
+        if info.header.end_with?("年月日")
+          parser.parse(field)
+        else
+          field
+        end
+      end
+      array_converter = lambda do |field, info|
+        case info.header
+        when "議案提出会派", "衆議院審議時賛成会派", "衆議院審議時反対会派", "議案提出者一覧", "議案提出の賛成者"
+          parse_array(field)
+        else
+          field
+        end
+      end
+      File.open(data_path) do |data_file|
+        options = {
+          col_sep: ",",
+          headers: true,
+          converters: [:integer, japanese_date_converter, array_converter],
+        }
+        # There are two columns within one column. To split into two columns, `#gsub` is necessary.
+        yield(CSV.new(data_file.read.gsub("／", ","), **options))
+      end
+    end
+    def parse_array(column_value)
+      column_value&.split("; ")
+    end
+  end
+end

data/lib/datasets/japanese-date-parser.rb ADDED Viewed

@@ -0,0 +1,38 @@
+module Datasets
+  class JapaneseDateParser
+    class UnsupportedEraInitialRange < Error; end
+    ERA_INITIALS = {
+      "平成" => "H",
+      "令和" => "R",
+    }.freeze
+    def parse(string)
+      case string
+      when nil
+        nil
+      when /\A(平成|令和|..)\s*(\d{1,2}|元)年\s*(\d{1,2})月\s*(\d{1,2})日\z/
+        match_data = Regexp.last_match
+        era_initial = ERA_INITIALS[match_data[1]]
+        if era_initial.nil?
+          message = +"era must be one of ["
+          message << ERA_INITIALS.keys.join(", ")
+          message << "]: #{match_data[1]}"
+          raise UnsupportedEraInitialRange, message
+        end
+        year = match_data[2]
+        if year == "元"
+          year = "01"
+        else
+          year = year.rjust(2, "0")
+        end
+        month = match_data[3].rjust(2, "0")
+        day = match_data[4].rjust(2, "0")
+        Date.jisx0301("#{era_initial}#{year}.#{month}.#{day}")
+      else
+        string
+      end
+    end
+  end
+end

data/lib/datasets/kuzushiji-mnist.rb CHANGED Viewed

@@ -2,9 +2,13 @@ require_relative 'mnist'
 module Datasets
   class KuzushijiMNIST < MNIST
-    BASE_URL = "http://codh.rois.ac.jp/kmnist/dataset/kmnist/"
     private
+    def base_urls
+      [
+        "http://codh.rois.ac.jp/kmnist/dataset/kmnist/",
+      ]
+    end
     def dataset_name
       "Kuzushiji-MNIST"
     end

data/lib/datasets/lazy.rb CHANGED Viewed

@@ -57,6 +57,8 @@ module Datasets
   LAZY_LOADER.register(:FuelEconomy, "datasets/fuel-economy")
   LAZY_LOADER.register(:Geolonia, "datasets/geolonia")
   LAZY_LOADER.register(:Hepatitis, "datasets/hepatitis")
+  LAZY_LOADER.register(:HouseOfCouncillor, "datasets/house-of-councillor")
+  LAZY_LOADER.register(:HouseOfRepresentative, "datasets/house-of-representative")
   LAZY_LOADER.register(:Iris, "datasets/iris")
   LAZY_LOADER.register(:ITACorpus, "datasets/ita-corpus")
   LAZY_LOADER.register(:KuzushijiMNIST, "datasets/kuzushiji-mnist")

data/lib/datasets/libsvm-dataset-list.rb CHANGED Viewed

@@ -110,7 +110,7 @@ module Datasets
           @row = []
         when "td"
           @in_td = true
-          @row << {:text => ""}
+          @row << {:text => +""}
         when "a"
           @row.last[:href] = attributes["href"] if @in_td
         end

data/lib/datasets/mnist.rb CHANGED Viewed

@@ -4,8 +4,6 @@ require_relative "dataset"
 module Datasets
   class MNIST < Dataset
-    BASE_URL = "http://yann.lecun.com/exdb/mnist/"
     class Record < Struct.new(:data, :label)
       def pixels
         data.unpack("C*")
@@ -27,7 +25,7 @@ module Datasets
       @metadata.id = "#{dataset_name.downcase}-#{type}"
       @metadata.name = "#{dataset_name}: #{type}"
-      @metadata.url = self.class::BASE_URL
+      @metadata.url = base_urls.first
       @metadata.licenses = licenses
       @type = type
@@ -44,15 +42,23 @@ module Datasets
       image_path = cache_dir_path + target_file(:image)
       label_path = cache_dir_path + target_file(:label)
-      base_url = self.class::BASE_URL
-      download(image_path, base_url + target_file(:image))
-      download(label_path, base_url + target_file(:label))
+      download(image_path,
+               *base_urls.collect { |base_url| base_url + target_file(:image) })
+      download(label_path,
+               *base_urls.collect { |base_url| base_url + target_file(:label) })
       open_data(image_path, label_path, &block)
     end
     private
+    def base_urls
+      [
+        "http://yann.lecun.com/exdb/mnist/",
+        "https://ossci-datasets.s3.amazonaws.com/mnist/",
+      ]
+    end
     def licenses
       []
     end

data/lib/datasets/nagoya-university-conversation-corpus.rb CHANGED Viewed

@@ -28,8 +28,8 @@ module Datasets
     def initialize
       super()
-      @metadata.id = 'nagoya-university-conversation-curpus'
-      @metadata.name = 'Nagoya University Conversation Curpus'
+      @metadata.id = 'nagoya-university-conversation-corpus'
+      @metadata.name = 'Nagoya University Conversation Corpus'
       @metadata.url = 'https://mmsrv.ninjal.ac.jp/nucc/'
       @metadata.licenses = ['CC-BY-NC-ND-4.0']
       @metadata.description = <<~DESCRIPTION

data/lib/datasets/postal-code-japan.rb CHANGED Viewed

@@ -41,7 +41,7 @@ module Datasets
       super()
       @reading = reading
       unless VALID_READINGS.include?(@reading)
-        message = ":reading must be one of ["
+        message = +":reading must be one of ["
         message << VALID_READINGS.collect(&:inspect).join(", ")
         message << "]: #{@reading.inspect}"
         raise ArgumentError, message
@@ -104,14 +104,14 @@ module Datasets
     private
     def open_data
-      data_url = "https://www.post.japanpost.jp/zipcode/dl"
+      data_url = +"https://www.post.japanpost.jp/zipcode/dl"
       case @reading
       when :lowercase
         data_url << "/kogaki/zip/ken_all.zip"
       when :uppercase
         data_url << "/oogaki/zip/ken_all.zip"
       when :romaji
-        data_url << "/roman/ken_all_rome.zip"
+        data_url << "/roman/KEN_ALL_ROME.zip"
       end
       data_path = cache_dir_path + "#{@reading}-ken-all.zip"
       download(data_path, data_url)