red-datasets 0.1.4 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -3
- data/Rakefile +56 -1
- data/doc/text/news.md +102 -0
- data/lib/datasets/adult.rb +6 -9
- data/lib/datasets/afinn.rb +48 -0
- data/lib/datasets/aozora-bunko.rb +196 -0
- data/lib/datasets/cache-path.rb +28 -0
- data/lib/datasets/california-housing.rb +60 -0
- data/lib/datasets/cifar.rb +2 -4
- data/lib/datasets/cldr-plurals.rb +2 -4
- data/lib/datasets/communities.rb +5 -8
- data/lib/datasets/dataset.rb +58 -23
- data/lib/datasets/diamonds.rb +26 -0
- data/lib/datasets/downloader.rb +110 -30
- data/lib/datasets/e-stat-japan.rb +2 -1
- data/lib/datasets/fashion-mnist.rb +4 -0
- data/lib/datasets/fuel-economy.rb +35 -0
- data/lib/datasets/geolonia.rb +67 -0
- data/lib/datasets/ggplot2-dataset.rb +79 -0
- data/lib/datasets/hepatitis.rb +5 -8
- data/lib/datasets/iris.rb +5 -8
- data/lib/datasets/ita-corpus.rb +57 -0
- data/lib/datasets/kuzushiji-mnist.rb +16 -0
- data/lib/datasets/lazy.rb +90 -0
- data/lib/datasets/libsvm-dataset-list.rb +5 -8
- data/lib/datasets/libsvm.rb +3 -4
- data/lib/datasets/license.rb +26 -0
- data/lib/datasets/livedoor-news.rb +80 -0
- data/lib/datasets/metadata.rb +14 -0
- data/lib/datasets/mnist.rb +7 -7
- data/lib/datasets/mushroom.rb +5 -8
- data/lib/datasets/nagoya-university-conversation-corpus.rb +109 -0
- data/lib/datasets/penguins.rb +6 -8
- data/lib/datasets/penn-treebank.rb +2 -4
- data/lib/datasets/pmjt-dataset-list.rb +67 -0
- data/lib/datasets/postal-code-japan.rb +2 -6
- data/lib/datasets/quora-duplicate-question-pair.rb +51 -0
- data/lib/datasets/{rdatasets.rb → rdataset.rb} +66 -15
- data/lib/datasets/seaborn.rb +90 -0
- data/lib/datasets/sudachi-synonym-dictionary.rb +5 -11
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets/wikipedia-kyoto-japanese-english.rb +219 -0
- data/lib/datasets/wikipedia.rb +16 -8
- data/lib/datasets/wine.rb +6 -9
- data/lib/datasets/zip-extractor.rb +48 -0
- data/lib/datasets.rb +2 -22
- data/red-datasets.gemspec +1 -1
- data/test/helper.rb +21 -0
- data/test/test-afinn.rb +60 -0
- data/test/test-aozora-bunko.rb +190 -0
- data/test/test-california-housing.rb +56 -0
- data/test/test-cldr-plurals.rb +1 -1
- data/test/test-dataset.rb +15 -7
- data/test/test-diamonds.rb +71 -0
- data/test/test-fuel-economy.rb +75 -0
- data/test/test-geolonia.rb +65 -0
- data/test/test-ita-corpus.rb +69 -0
- data/test/test-kuzushiji-mnist.rb +137 -0
- data/test/test-license.rb +24 -0
- data/test/test-livedoor-news.rb +351 -0
- data/test/test-metadata.rb +36 -0
- data/test/test-nagoya-university-conversation-corpus.rb +132 -0
- data/test/test-penguins.rb +1 -1
- data/test/test-pmjt-dataset-list.rb +50 -0
- data/test/test-quora-duplicate-question-pair.rb +33 -0
- data/test/test-rdataset.rb +246 -0
- data/test/{test-seaborn-data.rb → test-seaborn.rb} +71 -4
- data/test/test-sudachi-synonym-dictionary.rb +5 -5
- data/test/test-wikipedia-kyoto-japanese-english.rb +178 -0
- data/test/test-wikipedia.rb +25 -71
- metadata +62 -14
- data/lib/datasets/seaborn-data.rb +0 -49
- data/test/test-rdatasets.rb +0 -136
data/test/test-wikipedia.rb
CHANGED
@@ -1,100 +1,54 @@
|
|
1
1
|
class WikipediaTest < Test::Unit::TestCase
|
2
|
-
sub_test_case("
|
2
|
+
sub_test_case("en") do
|
3
3
|
sub_test_case("articles") do
|
4
|
-
include Helper::Sandbox
|
5
|
-
|
6
4
|
def setup
|
7
|
-
|
8
|
-
@dataset = Datasets::Wikipedia.new(language: :ja,
|
5
|
+
@dataset = Datasets::Wikipedia.new(language: :en,
|
9
6
|
type: :articles)
|
10
|
-
def @dataset.cache_dir_path
|
11
|
-
@cache_dir_path
|
12
|
-
end
|
13
|
-
def @dataset.cache_dir_path=(path)
|
14
|
-
@cache_dir_path = path
|
15
|
-
end
|
16
|
-
@dataset.cache_dir_path = @tmp_dir
|
17
|
-
end
|
18
|
-
|
19
|
-
def teardown
|
20
|
-
teardown_sandbox
|
21
7
|
end
|
22
8
|
|
23
9
|
test("#each") do
|
24
|
-
|
25
|
-
xml_path = output_path.sub_ext("")
|
26
|
-
xml_path.open("w") do |xml_file|
|
27
|
-
xml_file.puts(<<-XML)
|
28
|
-
<mediawiki
|
29
|
-
xmlns="http://www.mediawiki.org/xml/export-0.10/"
|
30
|
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
31
|
-
xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.10/ http://www.mediawiki.org/xml/export-0.10.xsd"
|
32
|
-
version="0.10" xml:lang="ja">
|
33
|
-
<siteinfo>
|
34
|
-
<sitename>Wikipedia</sitename>
|
35
|
-
</siteinfo>
|
36
|
-
<page>
|
37
|
-
<title>タイトル</title>
|
38
|
-
<ns>4</ns>
|
39
|
-
<id>1</id>
|
40
|
-
<restrictions>sysop</restrictions>
|
41
|
-
<revision>
|
42
|
-
<id>3</id>
|
43
|
-
<parentid>2</parentid>
|
44
|
-
<timestamp>2004-04-30T14:46:00Z</timestamp>
|
45
|
-
<contributor>
|
46
|
-
<username>user</username>
|
47
|
-
<id>10</id>
|
48
|
-
</contributor>
|
49
|
-
<minor />
|
50
|
-
<comment>コメント</comment>
|
51
|
-
<model>wikitext</model>
|
52
|
-
<format>text/x-wiki</format>
|
53
|
-
<text xml:space="preserve">テキスト</text>
|
54
|
-
<sha1>a9674b19f8c56f785c91a555d0a144522bb318e6</sha1>
|
55
|
-
</revision>
|
56
|
-
</page>
|
57
|
-
</mediawiki>
|
58
|
-
XML
|
59
|
-
end
|
60
|
-
unless system("bzip2", xml_path.to_s)
|
61
|
-
raise "failed to run bzip2"
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
contributor = Datasets::Wikipedia::Contributor.new("user", 10)
|
10
|
+
contributor = Datasets::Wikipedia::Contributor.new("Elli", 20842734)
|
66
11
|
revision = Datasets::Wikipedia::Revision.new
|
67
|
-
revision.id =
|
68
|
-
revision.parent_id =
|
69
|
-
revision.timestamp = Time.iso8601("
|
12
|
+
revision.id = 1002250816
|
13
|
+
revision.parent_id = 854851586
|
14
|
+
revision.timestamp = Time.iso8601("2021-01-23T15:15:01Z")
|
70
15
|
revision.contributor = contributor
|
71
|
-
revision.comment = "
|
16
|
+
revision.comment = "shel"
|
72
17
|
revision.model = "wikitext"
|
73
18
|
revision.format = "text/x-wiki"
|
74
|
-
revision.text =
|
75
|
-
|
19
|
+
revision.text = <<-TEXT.chomp
|
20
|
+
#REDIRECT [[Computer accessibility]]
|
21
|
+
|
22
|
+
{{rcat shell|
|
23
|
+
{{R from move}}
|
24
|
+
{{R from CamelCase}}
|
25
|
+
{{R unprintworthy}}
|
26
|
+
}}
|
27
|
+
TEXT
|
28
|
+
revision.sha1 = "kmysdltgexdwkv2xsml3j44jb56dxvn"
|
76
29
|
page = Datasets::Wikipedia::Page.new
|
77
|
-
page.title = "
|
78
|
-
page.namespace =
|
79
|
-
page.id =
|
80
|
-
page.restrictions =
|
30
|
+
page.title = "AccessibleComputing"
|
31
|
+
page.namespace = 0
|
32
|
+
page.id = 10
|
33
|
+
page.restrictions = nil
|
34
|
+
page.redirect = "Computer accessibility"
|
81
35
|
page.revision = revision
|
82
36
|
assert_equal(page, @dataset.each.first)
|
83
37
|
end
|
84
38
|
|
85
39
|
sub_test_case("#metadata") do
|
86
40
|
test("#id") do
|
87
|
-
assert_equal("wikipedia-
|
41
|
+
assert_equal("wikipedia-en-articles",
|
88
42
|
@dataset.metadata.id)
|
89
43
|
end
|
90
44
|
|
91
45
|
test("#name") do
|
92
|
-
assert_equal("Wikipedia articles (
|
46
|
+
assert_equal("Wikipedia articles (en)",
|
93
47
|
@dataset.metadata.name)
|
94
48
|
end
|
95
49
|
|
96
50
|
test("#description") do
|
97
|
-
assert_equal("Wikipedia articles in
|
51
|
+
assert_equal("Wikipedia articles in en",
|
98
52
|
@dataset.metadata.description)
|
99
53
|
end
|
100
54
|
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-datasets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomisuker
|
8
8
|
- Kouhei Sutou
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-05-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: csv
|
@@ -17,14 +17,14 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 3.
|
20
|
+
version: 3.2.4
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: 3.
|
27
|
+
version: 3.2.4
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rexml
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,65 +142,98 @@ files:
|
|
142
142
|
- doc/text/news.md
|
143
143
|
- lib/datasets.rb
|
144
144
|
- lib/datasets/adult.rb
|
145
|
+
- lib/datasets/afinn.rb
|
146
|
+
- lib/datasets/aozora-bunko.rb
|
147
|
+
- lib/datasets/cache-path.rb
|
148
|
+
- lib/datasets/california-housing.rb
|
145
149
|
- lib/datasets/cifar.rb
|
146
150
|
- lib/datasets/cldr-plurals.rb
|
147
151
|
- lib/datasets/communities.rb
|
148
152
|
- lib/datasets/dataset.rb
|
153
|
+
- lib/datasets/diamonds.rb
|
149
154
|
- lib/datasets/dictionary.rb
|
150
155
|
- lib/datasets/downloader.rb
|
151
156
|
- lib/datasets/e-stat-japan.rb
|
152
157
|
- lib/datasets/error.rb
|
153
158
|
- lib/datasets/fashion-mnist.rb
|
159
|
+
- lib/datasets/fuel-economy.rb
|
160
|
+
- lib/datasets/geolonia.rb
|
161
|
+
- lib/datasets/ggplot2-dataset.rb
|
154
162
|
- lib/datasets/hepatitis.rb
|
155
163
|
- lib/datasets/iris.rb
|
164
|
+
- lib/datasets/ita-corpus.rb
|
165
|
+
- lib/datasets/kuzushiji-mnist.rb
|
166
|
+
- lib/datasets/lazy.rb
|
156
167
|
- lib/datasets/libsvm-dataset-list.rb
|
157
168
|
- lib/datasets/libsvm.rb
|
169
|
+
- lib/datasets/license.rb
|
170
|
+
- lib/datasets/livedoor-news.rb
|
158
171
|
- lib/datasets/metadata.rb
|
159
172
|
- lib/datasets/mnist.rb
|
160
173
|
- lib/datasets/mushroom.rb
|
174
|
+
- lib/datasets/nagoya-university-conversation-corpus.rb
|
161
175
|
- lib/datasets/penguins.rb
|
162
176
|
- lib/datasets/penn-treebank.rb
|
177
|
+
- lib/datasets/pmjt-dataset-list.rb
|
163
178
|
- lib/datasets/postal-code-japan.rb
|
164
|
-
- lib/datasets/
|
165
|
-
- lib/datasets/
|
179
|
+
- lib/datasets/quora-duplicate-question-pair.rb
|
180
|
+
- lib/datasets/rdataset.rb
|
181
|
+
- lib/datasets/seaborn.rb
|
166
182
|
- lib/datasets/sudachi-synonym-dictionary.rb
|
167
183
|
- lib/datasets/table.rb
|
168
184
|
- lib/datasets/tar-gz-readable.rb
|
169
185
|
- lib/datasets/version.rb
|
186
|
+
- lib/datasets/wikipedia-kyoto-japanese-english.rb
|
170
187
|
- lib/datasets/wikipedia.rb
|
171
188
|
- lib/datasets/wine.rb
|
189
|
+
- lib/datasets/zip-extractor.rb
|
172
190
|
- red-datasets.gemspec
|
173
191
|
- test/helper.rb
|
174
192
|
- test/run-test.rb
|
175
193
|
- test/test-adult.rb
|
194
|
+
- test/test-afinn.rb
|
195
|
+
- test/test-aozora-bunko.rb
|
196
|
+
- test/test-california-housing.rb
|
176
197
|
- test/test-cifar.rb
|
177
198
|
- test/test-cldr-plurals.rb
|
178
199
|
- test/test-communities.rb
|
179
200
|
- test/test-dataset.rb
|
201
|
+
- test/test-diamonds.rb
|
180
202
|
- test/test-dictionary.rb
|
181
203
|
- test/test-downloader.rb
|
182
204
|
- test/test-e-stat-japan.rb
|
183
205
|
- test/test-fashion-mnist.rb
|
206
|
+
- test/test-fuel-economy.rb
|
207
|
+
- test/test-geolonia.rb
|
184
208
|
- test/test-hepatitis.rb
|
185
209
|
- test/test-iris.rb
|
210
|
+
- test/test-ita-corpus.rb
|
211
|
+
- test/test-kuzushiji-mnist.rb
|
186
212
|
- test/test-libsvm-dataset-list.rb
|
187
213
|
- test/test-libsvm.rb
|
214
|
+
- test/test-license.rb
|
215
|
+
- test/test-livedoor-news.rb
|
216
|
+
- test/test-metadata.rb
|
188
217
|
- test/test-mnist.rb
|
189
218
|
- test/test-mushroom.rb
|
219
|
+
- test/test-nagoya-university-conversation-corpus.rb
|
190
220
|
- test/test-penguins.rb
|
191
221
|
- test/test-penn-treebank.rb
|
222
|
+
- test/test-pmjt-dataset-list.rb
|
192
223
|
- test/test-postal-code-japan.rb
|
193
|
-
- test/test-
|
194
|
-
- test/test-
|
224
|
+
- test/test-quora-duplicate-question-pair.rb
|
225
|
+
- test/test-rdataset.rb
|
226
|
+
- test/test-seaborn.rb
|
195
227
|
- test/test-sudachi-synonym-dictionary.rb
|
196
228
|
- test/test-table.rb
|
229
|
+
- test/test-wikipedia-kyoto-japanese-english.rb
|
197
230
|
- test/test-wikipedia.rb
|
198
231
|
- test/test-wine.rb
|
199
232
|
homepage: https://github.com/red-data-tools/red-datasets
|
200
233
|
licenses:
|
201
234
|
- MIT
|
202
235
|
metadata: {}
|
203
|
-
post_install_message:
|
236
|
+
post_install_message:
|
204
237
|
rdoc_options: []
|
205
238
|
require_paths:
|
206
239
|
- lib
|
@@ -215,34 +248,49 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
215
248
|
- !ruby/object:Gem::Version
|
216
249
|
version: '0'
|
217
250
|
requirements: []
|
218
|
-
rubygems_version: 3.
|
219
|
-
signing_key:
|
251
|
+
rubygems_version: 3.5.0.dev
|
252
|
+
signing_key:
|
220
253
|
specification_version: 4
|
221
254
|
summary: Red Datasets provides classes that provide common datasets such as iris dataset.
|
222
255
|
test_files:
|
223
256
|
- test/helper.rb
|
224
257
|
- test/run-test.rb
|
225
258
|
- test/test-adult.rb
|
259
|
+
- test/test-afinn.rb
|
260
|
+
- test/test-aozora-bunko.rb
|
261
|
+
- test/test-california-housing.rb
|
226
262
|
- test/test-cifar.rb
|
227
263
|
- test/test-cldr-plurals.rb
|
228
264
|
- test/test-communities.rb
|
229
265
|
- test/test-dataset.rb
|
266
|
+
- test/test-diamonds.rb
|
230
267
|
- test/test-dictionary.rb
|
231
268
|
- test/test-downloader.rb
|
232
269
|
- test/test-e-stat-japan.rb
|
233
270
|
- test/test-fashion-mnist.rb
|
271
|
+
- test/test-fuel-economy.rb
|
272
|
+
- test/test-geolonia.rb
|
234
273
|
- test/test-hepatitis.rb
|
235
274
|
- test/test-iris.rb
|
275
|
+
- test/test-ita-corpus.rb
|
276
|
+
- test/test-kuzushiji-mnist.rb
|
236
277
|
- test/test-libsvm-dataset-list.rb
|
237
278
|
- test/test-libsvm.rb
|
279
|
+
- test/test-license.rb
|
280
|
+
- test/test-livedoor-news.rb
|
281
|
+
- test/test-metadata.rb
|
238
282
|
- test/test-mnist.rb
|
239
283
|
- test/test-mushroom.rb
|
284
|
+
- test/test-nagoya-university-conversation-corpus.rb
|
240
285
|
- test/test-penguins.rb
|
241
286
|
- test/test-penn-treebank.rb
|
287
|
+
- test/test-pmjt-dataset-list.rb
|
242
288
|
- test/test-postal-code-japan.rb
|
243
|
-
- test/test-
|
244
|
-
- test/test-
|
289
|
+
- test/test-quora-duplicate-question-pair.rb
|
290
|
+
- test/test-rdataset.rb
|
291
|
+
- test/test-seaborn.rb
|
245
292
|
- test/test-sudachi-synonym-dictionary.rb
|
246
293
|
- test/test-table.rb
|
294
|
+
- test/test-wikipedia-kyoto-japanese-english.rb
|
247
295
|
- test/test-wikipedia.rb
|
248
296
|
- test/test-wine.rb
|
@@ -1,49 +0,0 @@
|
|
1
|
-
module Datasets
|
2
|
-
class SeabornData < Dataset
|
3
|
-
URL_FORMAT = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/%{name}.csv".freeze
|
4
|
-
|
5
|
-
def initialize(name)
|
6
|
-
super()
|
7
|
-
@metadata.id = "seaborn-data-#{name}"
|
8
|
-
@metadata.name = "SeabornData: #{name}"
|
9
|
-
@metadata.url = URL_FORMAT % {name: name}
|
10
|
-
|
11
|
-
@data_path = cache_dir_path + (name + ".csv")
|
12
|
-
@name = name
|
13
|
-
end
|
14
|
-
|
15
|
-
def each(&block)
|
16
|
-
return to_enum(__method__) unless block_given?
|
17
|
-
|
18
|
-
download(@data_path, @metadata.url) unless @data_path.exist?
|
19
|
-
CSV.open(@data_path, headers: :first_row, converters: :all) do |csv|
|
20
|
-
csv.each do |row|
|
21
|
-
record = prepare_record(row)
|
22
|
-
yield record
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
private
|
28
|
-
def prepare_record(csv_row)
|
29
|
-
record = csv_row.to_h
|
30
|
-
record.transform_keys!(&:to_sym)
|
31
|
-
|
32
|
-
# Perform the same preprocessing as seaborn's load_dataset function
|
33
|
-
preprocessor = :"preprocess_#{@name}_record"
|
34
|
-
__send__(preprocessor, record) if respond_to?(preprocessor, true)
|
35
|
-
|
36
|
-
record
|
37
|
-
end
|
38
|
-
|
39
|
-
# The same preprocessing as seaborn.load_dataset
|
40
|
-
def preprocess_flights_record(record)
|
41
|
-
record[:month] &&= record[:month][0,3]
|
42
|
-
end
|
43
|
-
|
44
|
-
# The same preprocessing as seaborn.load_dataset
|
45
|
-
def preprocess_penguins_record(record)
|
46
|
-
record[:sex] &&= record[:sex].capitalize
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
data/test/test-rdatasets.rb
DELETED
@@ -1,136 +0,0 @@
|
|
1
|
-
class RdatasetsTest < Test::Unit::TestCase
|
2
|
-
sub_test_case("RdatasetsList") do
|
3
|
-
def setup
|
4
|
-
@dataset = Datasets::RdatasetsList.new
|
5
|
-
end
|
6
|
-
|
7
|
-
sub_test_case("#each") do
|
8
|
-
test("with package_name") do
|
9
|
-
records = @dataset.filter(package: "datasets").to_a
|
10
|
-
assert_equal([
|
11
|
-
84,
|
12
|
-
{
|
13
|
-
package: "datasets",
|
14
|
-
dataset: "ability.cov",
|
15
|
-
title: "Ability and Intelligence Tests",
|
16
|
-
rows: 6,
|
17
|
-
cols: 8,
|
18
|
-
n_binary: 0,
|
19
|
-
n_character: 0,
|
20
|
-
n_factor: 0,
|
21
|
-
n_logical: 0,
|
22
|
-
n_numeric: 8,
|
23
|
-
csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/ability.cov.csv",
|
24
|
-
doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/ability.cov.html"
|
25
|
-
},
|
26
|
-
{
|
27
|
-
package: "datasets",
|
28
|
-
dataset: "WWWusage",
|
29
|
-
title: "Internet Usage per Minute",
|
30
|
-
rows: 100,
|
31
|
-
cols: 2,
|
32
|
-
n_binary: 0,
|
33
|
-
n_character: 0,
|
34
|
-
n_factor: 0,
|
35
|
-
n_logical: 0,
|
36
|
-
n_numeric: 2,
|
37
|
-
csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/WWWusage.csv",
|
38
|
-
doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/WWWusage.html"
|
39
|
-
}
|
40
|
-
],
|
41
|
-
[
|
42
|
-
records.size,
|
43
|
-
records[0].to_h,
|
44
|
-
records[-1].to_h
|
45
|
-
])
|
46
|
-
end
|
47
|
-
|
48
|
-
test("without package_name") do
|
49
|
-
records = @dataset.each.to_a
|
50
|
-
assert_equal([
|
51
|
-
1714,
|
52
|
-
{
|
53
|
-
package: "AER",
|
54
|
-
dataset: "Affairs",
|
55
|
-
title: "Fair's Extramarital Affairs Data",
|
56
|
-
rows: 601,
|
57
|
-
cols: 9,
|
58
|
-
n_binary: 2,
|
59
|
-
n_character: 0,
|
60
|
-
n_factor: 2,
|
61
|
-
n_logical: 0,
|
62
|
-
n_numeric: 7,
|
63
|
-
csv: "https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv",
|
64
|
-
doc: "https://vincentarelbundock.github.io/Rdatasets/doc/AER/Affairs.html"
|
65
|
-
},
|
66
|
-
{
|
67
|
-
package: "vcd",
|
68
|
-
dataset: "WomenQueue",
|
69
|
-
title: "Women in Queues",
|
70
|
-
rows: 11,
|
71
|
-
cols: 2,
|
72
|
-
n_binary: 0,
|
73
|
-
n_character: 0,
|
74
|
-
n_factor: 1,
|
75
|
-
n_logical: 0,
|
76
|
-
n_numeric: 1,
|
77
|
-
csv: "https://vincentarelbundock.github.io/Rdatasets/csv/vcd/WomenQueue.csv",
|
78
|
-
doc: "https://vincentarelbundock.github.io/Rdatasets/doc/vcd/WomenQueue.html"
|
79
|
-
},
|
80
|
-
],
|
81
|
-
[
|
82
|
-
records.size,
|
83
|
-
records[0].to_h,
|
84
|
-
records[-1].to_h
|
85
|
-
])
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
sub_test_case("Rdatasets") do
|
91
|
-
sub_test_case("datasets") do
|
92
|
-
sub_test_case("AirPassengers") do
|
93
|
-
def setup
|
94
|
-
@dataset = Datasets::Rdatasets.new("datasets", "AirPassengers")
|
95
|
-
end
|
96
|
-
|
97
|
-
test("#each") do
|
98
|
-
records = @dataset.each.to_a
|
99
|
-
assert_equal([
|
100
|
-
144,
|
101
|
-
{ time: 1949, value: 112 },
|
102
|
-
{ time: 1960.91666666667, value: 432 },
|
103
|
-
],
|
104
|
-
[
|
105
|
-
records.size,
|
106
|
-
records[0],
|
107
|
-
records[-1]
|
108
|
-
])
|
109
|
-
end
|
110
|
-
|
111
|
-
test("#metadata.id") do
|
112
|
-
assert_equal("rdatasets-datasets-AirPassengers", @dataset.metadata.id)
|
113
|
-
end
|
114
|
-
|
115
|
-
test("#metadata.description") do
|
116
|
-
description = @dataset.metadata.description
|
117
|
-
assert do
|
118
|
-
description.include?("Monthly Airline Passenger Numbers 1949-1960")
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
test("invalid dataset name") do
|
124
|
-
assert_raise(ArgumentError) do
|
125
|
-
Datasets::Rdatasets.new("datasets", "invalid datasets name")
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
test("invalid package name") do
|
131
|
-
assert_raise(ArgumentError) do
|
132
|
-
Datasets::Rdatasets.new("invalid package name", "AirPassengers")
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|