red-datasets 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0239c4ab86dd9f589b1f67b9d6c381570e25a29289c261470943ed48f7dfc3d0'
4
- data.tar.gz: 2f3f3af1f17a1bd1e7aa307e2b182108790549754d907262105e18479997cde6
3
+ metadata.gz: 6fbd4d11063f89ba2e09250b751886086c953ec8bc92c75a6a351c31a36da0c4
4
+ data.tar.gz: acc6ff31f0f4ae3a6c6565fe569233c01615718c01300b0838ff744571edc34d
5
5
  SHA512:
6
- metadata.gz: 04b3dbc23dc8679855a6104a9f3da39871594979f149295ef13b3be864a3dbbdb6bec3fb59153db9b5be4fade6819686e13b60a38f1d1721bf7e1163d4bb49b8
7
- data.tar.gz: 476a9081fe0db32aad8a4e00c7e08f77002e58a2f2c68eb37aecf2a70054d43877707ddda29137b361a5a37ff979f3c28c5f2a03c2d1e96cbc7f7289f659ba9f
6
+ metadata.gz: 26361511155b447ffed56a79b2336a9a1db96494bf856b23e7b39cc6a8b6a2039e7ed27564140761bdb2daaae7ee563b3695c464a7a7b21ff93b0636f6b8338d
7
+ data.tar.gz: 40446f90e410e0d86abeec186a1d7adcc5375e29c19dc934f823befb26a87d904458ef5ea18c9d64055493d29ed305dba53d6e4d86bd7d84488baf3745ebd792
data/doc/text/news.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # News
2
2
 
3
+ ## 0.1.2 - 2021-06-03
4
+
5
+ ### Improvements
6
+
7
+ * `Datasets::Rdatasets` and `Datasets::RdatasetsList`: Added.
8
+
9
+ * `Datasets::Penguins`: Changed for compatibility with seaborn's
10
+ penguins dataset.
11
+
3
12
  ## 0.1.1 - 2021-04-11
4
13
 
5
14
  ### Improvements
data/lib/datasets.rb CHANGED
@@ -15,5 +15,6 @@ require_relative "datasets/mushroom"
15
15
  require_relative "datasets/penguins"
16
16
  require_relative "datasets/penn-treebank"
17
17
  require_relative "datasets/postal-code-japan"
18
+ require_relative "datasets/rdatasets"
18
19
  require_relative "datasets/wikipedia"
19
20
  require_relative "datasets/wine"
@@ -1,10 +1,10 @@
1
- require "rubygems/package"
2
- require "zlib"
3
-
1
+ require_relative "tar_gz_readable"
4
2
  require_relative "dataset"
5
3
 
6
4
  module Datasets
7
5
  class CIFAR < Dataset
6
+ include TarGzReadable
7
+
8
8
  module Pixelable
9
9
  def pixels
10
10
  data.unpack("C*")
@@ -61,7 +61,7 @@ module Datasets
61
61
  private
62
62
 
63
63
  def parse_data(data_path, &block)
64
- open_tar(data_path) do |tar|
64
+ open_tar_gz(data_path) do |tar|
65
65
  target_file_names.each do |target_file_name|
66
66
  tar.seek(target_file_name) do |entry|
67
67
  parse_entry(entry, &block)
@@ -124,14 +124,6 @@ module Datasets
124
124
  end
125
125
  end
126
126
  end
127
-
128
- def open_tar(data_path)
129
- Zlib::GzipReader.open(data_path) do |f|
130
- Gem::Package::TarReader.new(f) do |tar|
131
- yield(tar)
132
- end
133
- end
134
- end
135
127
  end
136
128
  end
137
129
 
@@ -18,11 +18,17 @@ module Datasets
18
18
  Table.new(self)
19
19
  end
20
20
 
21
+ def clear_cache!
22
+ if cache_dir_path.exist?
23
+ FileUtils.rmtree(cache_dir_path.to_s, secure: true)
24
+ end
25
+ end
26
+
21
27
  private
22
28
  def cache_dir_path
23
29
  case RUBY_PLATFORM
24
30
  when /mswin/, /mingw/
25
- base_dir = ENV["LOCALAPPDATA"] || "~/AppData"
31
+ base_dir = ENV["LOCALAPPDATA"] || "~/AppData/Local"
26
32
  when /darwin/
27
33
  base_dir = "~/Library/Caches"
28
34
  else
@@ -8,6 +8,8 @@ require "pathname"
8
8
 
9
9
  module Datasets
10
10
  class Downloader
11
+ class TooManyRedirects < StandardError; end
12
+
11
13
  def initialize(url)
12
14
  if url.is_a?(URI::Generic)
13
15
  url = url.dup
@@ -31,41 +33,65 @@ module Datasets
31
33
  headers["Range"] = "bytes=#{start}-"
32
34
  end
33
35
 
34
- Net::HTTP.start(@url.hostname,
35
- @url.port,
36
- :use_ssl => (@url.scheme == "https")) do |http|
37
- path = @url.path
38
- path += "?#{@url.query}" if @url.query
36
+ start_http(@url, headers) do |response|
37
+ if response.is_a?(Net::HTTPPartialContent)
38
+ mode = "ab"
39
+ else
40
+ start = nil
41
+ mode = "wb"
42
+ end
43
+
44
+ base_name = @url.path.split("/").last
45
+ size_current = 0
46
+ size_max = response.content_length
47
+ if start
48
+ size_current += start
49
+ size_max += start
50
+ end
51
+ progress_reporter = ProgressReporter.new(base_name, size_max)
52
+ partial_output_path.open(mode) do |output|
53
+ response.read_body do |chunk|
54
+ size_current += chunk.bytesize
55
+ progress_reporter.report(size_current)
56
+ output.write(chunk)
57
+ end
58
+ end
59
+ end
60
+ FileUtils.mv(partial_output_path, output_path)
61
+ rescue TooManyRedirects => error
62
+ last_url = error.message[/\Atoo many redirections: (.+)\z/, 1]
63
+ raise TooManyRedirects, "too many redirections: #{@url} .. #{last_url}"
64
+ end
65
+
66
+ private def start_http(url, headers, limit = 10, &block)
67
+ if limit == 0
68
+ raise TooManyRedirects, "too many redirections: #{url}"
69
+ end
70
+ http = Net::HTTP.new(url.hostname, url.port)
71
+ # http.set_debug_output($stderr)
72
+ http.use_ssl = (url.scheme == "https")
73
+ http.start do
74
+ path = url.path
75
+ path += "?#{url.query}" if url.query
39
76
  request = Net::HTTP::Get.new(path, headers)
40
77
  http.request(request) do |response|
41
78
  case response
42
- when Net::HTTPPartialContent
43
- mode = "ab"
44
- when Net::HTTPSuccess
45
- start = nil
46
- mode = "wb"
79
+ when Net::HTTPSuccess, Net::HTTPPartialContent
80
+ return block.call(response)
81
+ when Net::HTTPRedirection
82
+ url = URI.parse(response[:location])
83
+ $stderr.puts "Redirect to #{url}"
84
+ return start_http(url, headers, limit - 1, &block)
47
85
  else
48
- break
49
- end
50
-
51
- base_name = @url.path.split("/").last
52
- size_current = 0
53
- size_max = response.content_length
54
- if start
55
- size_current += start
56
- size_max += start
57
- end
58
- progress_reporter = ProgressReporter.new(base_name, size_max)
59
- partial_output_path.open(mode) do |output|
60
- response.read_body do |chunk|
61
- size_current += chunk.bytesize
62
- progress_reporter.report(size_current)
63
- output.write(chunk)
86
+ message = response.code
87
+ if response.message and not response.message.empty?
88
+ message += ": #{response.message}"
64
89
  end
90
+ message += ": #{url}"
91
+ raise response.error_type.new(message, response)
65
92
  end
66
93
  end
67
94
  end
68
- FileUtils.mv(partial_output_path, output_path)
69
95
  end
70
96
 
71
97
  class ProgressReporter
@@ -19,7 +19,6 @@ module Datasets
19
19
  :delta_15_n_permil,
20
20
  :delta_13_c_permil,
21
21
  :comments)
22
-
23
22
  class SpeciesBase < Dataset
24
23
  def initialize
25
24
  super
@@ -62,17 +61,17 @@ module Datasets
62
61
  URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.219.3&entityid=002f3893385f710df69eeebe893144ff".freeze
63
62
  end
64
63
 
65
- # Gentoo penguin data from: https://doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce
66
- class Gentoo < SpeciesBase
67
- DOI = "doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce".freeze
68
- URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.220.3&entityid=e03b43c924f226486f2f0ab6709d2381".freeze
69
- end
70
-
71
64
  # Chinstrap penguin data from: https://doi.org/10.6073/pasta/409c808f8fc9899d02401bdb04580af7
72
65
  class Chinstrap < SpeciesBase
73
66
  DOI = "doi.org/10.6073/pasta/409c808f8fc9899d02401bdb04580af7".freeze
74
67
  URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.221.2&entityid=fe853aa8f7a59aa84cdd3197619ef462".freeze
75
68
  end
69
+
70
+ # Gentoo penguin data from: https://doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce
71
+ class Gentoo < SpeciesBase
72
+ DOI = "doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce".freeze
73
+ URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.220.3&entityid=e03b43c924f226486f2f0ab6709d2381".freeze
74
+ end
76
75
  end
77
76
 
78
77
  # This dataset provides the same dataset as https://github.com/allisonhorst/palmerpenguins
@@ -100,8 +99,8 @@ module Datasets
100
99
 
101
100
  species_classes = [
102
101
  PenguinsRawData::Adelie,
102
+ PenguinsRawData::Chinstrap,
103
103
  PenguinsRawData::Gentoo,
104
- PenguinsRawData::Chinstrap
105
104
  ]
106
105
 
107
106
  species_classes.each do |species_class|
@@ -112,14 +111,36 @@ module Datasets
112
111
  end
113
112
 
114
113
  private def convert_record(raw_record)
115
- Record.new(raw_record.species.split(' ')[0],
116
- raw_record.island,
117
- raw_record.culmen_length_mm,
118
- raw_record.culmen_depth_mm,
119
- raw_record.flipper_length_mm&.to_i,
120
- raw_record.body_mass_g&.to_i,
121
- raw_record.sex&.downcase,
122
- raw_record.date_egg&.year)
114
+ Record.new(*cleanse_fields(raw_record))
115
+ end
116
+
117
+ private def cleanse_fields(raw_record)
118
+ species = raw_record.species.split(' ')[0]
119
+ flipper_length_mm = raw_record.flipper_length_mm&.to_i
120
+ body_mass_g = raw_record.body_mass_g&.to_i
121
+ sex = normalize_sex(raw_record.sex)
122
+ year = raw_record.date_egg&.year
123
+
124
+ [
125
+ species,
126
+ raw_record.island,
127
+ raw_record.culmen_length_mm,
128
+ raw_record.culmen_depth_mm,
129
+ flipper_length_mm,
130
+ body_mass_g,
131
+ sex,
132
+ year
133
+ ]
134
+ end
135
+
136
+ private def normalize_sex(val)
137
+ val = val&.downcase
138
+ case val
139
+ when "female", "male", nil
140
+ val
141
+ else
142
+ nil
143
+ end
123
144
  end
124
145
  end
125
146
  end
@@ -0,0 +1,95 @@
1
+ require_relative "dataset"
2
+ require_relative "tar_gz_readable"
3
+
4
+ module Datasets
5
+ class RdatasetsList < Dataset
6
+ Record = Struct.new(:package,
7
+ :dataset,
8
+ :title,
9
+ :rows,
10
+ :cols,
11
+ :n_binary,
12
+ :n_character,
13
+ :n_factor,
14
+ :n_logical,
15
+ :n_numeric,
16
+ :csv,
17
+ :doc)
18
+
19
+ def initialize
20
+ super
21
+ @metadata.id = "rdatasets"
22
+ @metadata.name = "Rdatasets"
23
+ @metadata.url = "https://vincentarelbundock.github.io/Rdatasets/"
24
+ @metadata.licenses = ["GPL-3"]
25
+ @data_url = "https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/datasets.csv"
26
+ @data_path = cache_dir_path + "datasets.csv"
27
+ end
28
+
29
+ def filter(package: nil, dataset: nil)
30
+ return to_enum(__method__, package: package, dataset: dataset) unless block_given?
31
+
32
+ conds = {}
33
+ conds["Package"] = package if package
34
+ conds["Item"] = dataset if dataset
35
+ if conds.empty?
36
+ each_row {|row| yield Record.new(*row.fields) }
37
+ else
38
+ each_row do |row|
39
+ if conds.all? {|k, v| row[k] == v }
40
+ yield Record.new(*row.fields)
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ def each(&block)
47
+ filter(&block)
48
+ end
49
+
50
+ private def each_row(&block)
51
+ download(@data_path, @data_url) unless @data_path.exist?
52
+ CSV.open(@data_path, headers: :first_row, converters: :all) do |csv|
53
+ csv.each(&block)
54
+ end
55
+ end
56
+ end
57
+
58
+ class Rdatasets < Dataset
59
+ def initialize(package_name, dataset_name)
60
+ list = RdatasetsList.new
61
+
62
+ info = list.filter(package: package_name, dataset: dataset_name).first
63
+ unless info
64
+ raise ArgumentError, "Unable to locate dataset #{package_name}/#{dataset_name}"
65
+ end
66
+
67
+ super()
68
+ @metadata.id = "rdatasets-#{package_name}-#{dataset_name}"
69
+ @metadata.name = "Rdatasets: #{package_name}: #{dataset_name}"
70
+ @metadata.url = info.csv
71
+ @metadata.licenses = ["GPL-3"]
72
+ @metadata.description = info.title
73
+
74
+ # Follow the original directory structure in the cache directory
75
+ @data_path = cache_dir_path + (dataset_name + ".csv")
76
+
77
+ @package_name = package_name
78
+ @dataset_name = dataset_name
79
+ end
80
+
81
+ def each(&block)
82
+ return to_enum(__method__) unless block_given?
83
+
84
+ download(@data_path, @metadata.url) unless @data_path.exist?
85
+ CSV.open(@data_path, headers: :first_row, converters: :all) do |csv|
86
+ csv.each do |row|
87
+ record = row.to_h
88
+ record.delete("")
89
+ record.transform_keys!(&:to_sym)
90
+ yield record
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,14 @@
1
+ require "rubygems/package"
2
+ require "zlib"
3
+
4
+ module Datasets
5
+ module TarGzReadable
6
+ def open_tar_gz(data_path)
7
+ Zlib::GzipReader.open(data_path) do |f|
8
+ Gem::Package::TarReader.new(f) do |tar|
9
+ yield(tar)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
@@ -1,3 +1,3 @@
1
1
  module Datasets
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -0,0 +1,27 @@
1
+ class TestDataset < Test::Unit::TestCase
2
+ sub_test_case("#clear_cache!") do
3
+ def setup
4
+ @dataset = Datasets::Iris.new
5
+ @cache_dir_path = @dataset.send(:cache_dir_path)
6
+ end
7
+
8
+ test("when the dataset is downloaded") do
9
+ @dataset.first # This ensures the dataset downloaded
10
+ existence = {before: @cache_dir_path.join("iris.csv").exist?}
11
+
12
+ @dataset.clear_cache!
13
+ existence[:after] = @cache_dir_path.join("iris.csv").exist?
14
+
15
+ assert_equal({before: true, after: false},
16
+ existence)
17
+ end
18
+
19
+ test("when the dataset is not downloaded") do
20
+ FileUtils.rmtree(@cache_dir_path.to_s, secure: true) if @cache_dir_path.exist?
21
+
22
+ assert_nothing_raised do
23
+ @dataset.clear_cache!
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,29 @@
1
+ class DownloaderTest < Test::Unit::TestCase
2
+ include Helper::Sandbox
3
+
4
+ sub_test_case("#download") do
5
+ def setup
6
+ setup_sandbox
7
+ end
8
+
9
+ def teardown
10
+ teardown_sandbox
11
+ end
12
+
13
+ test("too many redirection") do
14
+ first_url = "https://example.com/file"
15
+ last_url = "https://example.com/last_redirection"
16
+ expected_message = "too many redirections: #{first_url} .. #{last_url}"
17
+ output_path = @tmp_dir + "file"
18
+ downloader = Datasets::Downloader.new(first_url)
19
+
20
+ downloader.define_singleton_method(:start_http) do |url, headers|
21
+ raise Datasets::Downloader::TooManyRedirects, "too many redirections: #{last_url}"
22
+ end
23
+
24
+ assert_raise(Datasets::Downloader::TooManyRedirects.new(expected_message)) do
25
+ downloader.download(output_path)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -182,6 +182,18 @@ class PenguinsTest < Test::Unit::TestCase
182
182
  @dataset = Datasets::Penguins.new
183
183
  end
184
184
 
185
+ test("order of species") do
186
+ species_values = @dataset.map {|r| r.species }.uniq
187
+ assert_equal(["Adelie", "Chinstrap", "Gentoo"],
188
+ species_values)
189
+ end
190
+
191
+ test("data cleansing") do
192
+ sex_values = @dataset.map {|r| r.sex }.uniq.compact.sort
193
+ assert_equal(["female", "male"],
194
+ sex_values)
195
+ end
196
+
185
197
  test("#each") do
186
198
  records = @dataset.each.to_a
187
199
  assert_equal([
@@ -196,16 +208,6 @@ class PenguinsTest < Test::Unit::TestCase
196
208
  sex: "male",
197
209
  year: 2007
198
210
  },
199
- {
200
- species: "Gentoo",
201
- island: "Biscoe",
202
- bill_length_mm: 46.1,
203
- bill_depth_mm: 13.2,
204
- flipper_length_mm: 211,
205
- body_mass_g: 4500,
206
- sex: "female",
207
- year: 2007
208
- },
209
211
  {
210
212
  species: "Chinstrap",
211
213
  island: "Dream",
@@ -217,13 +219,23 @@ class PenguinsTest < Test::Unit::TestCase
217
219
  year: 2007
218
220
  },
219
221
  {
220
- species: "Chinstrap",
221
- island: "Dream",
222
- bill_length_mm: 50.2,
223
- bill_depth_mm: 18.7,
224
- flipper_length_mm: 198,
225
- body_mass_g: 3775,
222
+ species: "Gentoo",
223
+ island: "Biscoe",
224
+ bill_length_mm: 46.1,
225
+ bill_depth_mm: 13.2,
226
+ flipper_length_mm: 211,
227
+ body_mass_g: 4500,
226
228
  sex: "female",
229
+ year: 2007
230
+ },
231
+ {
232
+ species: "Gentoo",
233
+ island: "Biscoe",
234
+ bill_length_mm: 49.9,
235
+ bill_depth_mm: 16.1,
236
+ flipper_length_mm: 213,
237
+ body_mass_g: 5400,
238
+ sex: "male",
227
239
  year: 2009
228
240
  }
229
241
  ],
@@ -231,7 +243,7 @@ class PenguinsTest < Test::Unit::TestCase
231
243
  records.size,
232
244
  records[0].to_h,
233
245
  records[152].to_h,
234
- records[276].to_h,
246
+ records[220].to_h,
235
247
  records[-1].to_h,
236
248
  ])
237
249
  end
@@ -0,0 +1,136 @@
1
+ class RdatasetsTest < Test::Unit::TestCase
2
+ sub_test_case("RdatasetsList") do
3
+ def setup
4
+ @dataset = Datasets::RdatasetsList.new
5
+ end
6
+
7
+ sub_test_case("#each") do
8
+ test("with package_name") do
9
+ records = @dataset.filter(package: "datasets").to_a
10
+ assert_equal([
11
+ 84,
12
+ {
13
+ package: "datasets",
14
+ dataset: "ability.cov",
15
+ title: "Ability and Intelligence Tests",
16
+ rows: 6,
17
+ cols: 8,
18
+ n_binary: 0,
19
+ n_character: 0,
20
+ n_factor: 0,
21
+ n_logical: 0,
22
+ n_numeric: 8,
23
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/ability.cov.csv",
24
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/ability.cov.html"
25
+ },
26
+ {
27
+ package: "datasets",
28
+ dataset: "WWWusage",
29
+ title: "Internet Usage per Minute",
30
+ rows: 100,
31
+ cols: 2,
32
+ n_binary: 0,
33
+ n_character: 0,
34
+ n_factor: 0,
35
+ n_logical: 0,
36
+ n_numeric: 2,
37
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/WWWusage.csv",
38
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/WWWusage.html"
39
+ }
40
+ ],
41
+ [
42
+ records.size,
43
+ records[0].to_h,
44
+ records[-1].to_h
45
+ ])
46
+ end
47
+
48
+ test("without package_name") do
49
+ records = @dataset.each.to_a
50
+ assert_equal([
51
+ 1478,
52
+ {
53
+ package: "AER",
54
+ dataset: "Affairs",
55
+ title: "Fair's Extramarital Affairs Data",
56
+ rows: 601,
57
+ cols: 9,
58
+ n_binary: 2,
59
+ n_character: 0,
60
+ n_factor: 2,
61
+ n_logical: 0,
62
+ n_numeric: 7,
63
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv",
64
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/AER/Affairs.html"
65
+ },
66
+ {
67
+ package: "vcd",
68
+ dataset: "WomenQueue",
69
+ title: "Women in Queues",
70
+ rows: 11,
71
+ cols: 2,
72
+ n_binary: 0,
73
+ n_character: 0,
74
+ n_factor: 1,
75
+ n_logical: 0,
76
+ n_numeric: 1,
77
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/vcd/WomenQueue.csv",
78
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/vcd/WomenQueue.html"
79
+ },
80
+ ],
81
+ [
82
+ records.size,
83
+ records[0].to_h,
84
+ records[-1].to_h
85
+ ])
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("Rdatasets") do
91
+ sub_test_case("datasets") do
92
+ sub_test_case("AirPassengers") do
93
+ def setup
94
+ @dataset = Datasets::Rdatasets.new("datasets", "AirPassengers")
95
+ end
96
+
97
+ test("#each") do
98
+ records = @dataset.each.to_a
99
+ assert_equal([
100
+ 144,
101
+ { time: 1949, value: 112 },
102
+ { time: 1960.91666666667, value: 432 },
103
+ ],
104
+ [
105
+ records.size,
106
+ records[0],
107
+ records[-1]
108
+ ])
109
+ end
110
+
111
+ test("#metadata.id") do
112
+ assert_equal("rdatasets-datasets-AirPassengers", @dataset.metadata.id)
113
+ end
114
+
115
+ test("#metadata.description") do
116
+ description = @dataset.metadata.description
117
+ assert do
118
+ description.include?("Monthly Airline Passenger Numbers 1949-1960")
119
+ end
120
+ end
121
+ end
122
+
123
+ test("invalid dataset name") do
124
+ assert_raise(ArgumentError) do
125
+ Datasets::Rdatasets.new("datasets", "invalid datasets name")
126
+ end
127
+ end
128
+ end
129
+
130
+ test("invalid package name") do
131
+ assert_raise(ArgumentError) do
132
+ Datasets::Rdatasets.new("invalid package name", "AirPassengers")
133
+ end
134
+ end
135
+ end
136
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-04-11 00:00:00.000000000 Z
12
+ date: 2021-06-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: csv
@@ -161,7 +161,9 @@ files:
161
161
  - lib/datasets/penguins.rb
162
162
  - lib/datasets/penn-treebank.rb
163
163
  - lib/datasets/postal-code-japan.rb
164
+ - lib/datasets/rdatasets.rb
164
165
  - lib/datasets/table.rb
166
+ - lib/datasets/tar_gz_readable.rb
165
167
  - lib/datasets/version.rb
166
168
  - lib/datasets/wikipedia.rb
167
169
  - lib/datasets/wine.rb
@@ -172,7 +174,9 @@ files:
172
174
  - test/test-cifar.rb
173
175
  - test/test-cldr-plurals.rb
174
176
  - test/test-communities.rb
177
+ - test/test-dataset.rb
175
178
  - test/test-dictionary.rb
179
+ - test/test-downloader.rb
176
180
  - test/test-e-stat-japan.rb
177
181
  - test/test-fashion-mnist.rb
178
182
  - test/test-hepatitis.rb
@@ -184,6 +188,7 @@ files:
184
188
  - test/test-penguins.rb
185
189
  - test/test-penn-treebank.rb
186
190
  - test/test-postal-code-japan.rb
191
+ - test/test-rdatasets.rb
187
192
  - test/test-table.rb
188
193
  - test/test-wikipedia.rb
189
194
  - test/test-wine.rb
@@ -217,7 +222,9 @@ test_files:
217
222
  - test/test-cifar.rb
218
223
  - test/test-cldr-plurals.rb
219
224
  - test/test-communities.rb
225
+ - test/test-dataset.rb
220
226
  - test/test-dictionary.rb
227
+ - test/test-downloader.rb
221
228
  - test/test-e-stat-japan.rb
222
229
  - test/test-fashion-mnist.rb
223
230
  - test/test-hepatitis.rb
@@ -229,6 +236,7 @@ test_files:
229
236
  - test/test-penguins.rb
230
237
  - test/test-penn-treebank.rb
231
238
  - test/test-postal-code-japan.rb
239
+ - test/test-rdatasets.rb
232
240
  - test/test-table.rb
233
241
  - test/test-wikipedia.rb
234
242
  - test/test-wine.rb