red-datasets 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -3
- data/Rakefile +56 -1
- data/doc/text/news.md +102 -0
- data/lib/datasets/adult.rb +6 -9
- data/lib/datasets/afinn.rb +48 -0
- data/lib/datasets/aozora-bunko.rb +196 -0
- data/lib/datasets/cache-path.rb +28 -0
- data/lib/datasets/california-housing.rb +60 -0
- data/lib/datasets/cifar.rb +2 -4
- data/lib/datasets/cldr-plurals.rb +2 -4
- data/lib/datasets/communities.rb +5 -8
- data/lib/datasets/dataset.rb +58 -23
- data/lib/datasets/diamonds.rb +26 -0
- data/lib/datasets/downloader.rb +110 -30
- data/lib/datasets/e-stat-japan.rb +2 -1
- data/lib/datasets/fashion-mnist.rb +4 -0
- data/lib/datasets/fuel-economy.rb +35 -0
- data/lib/datasets/geolonia.rb +67 -0
- data/lib/datasets/ggplot2-dataset.rb +79 -0
- data/lib/datasets/hepatitis.rb +5 -8
- data/lib/datasets/iris.rb +5 -8
- data/lib/datasets/ita-corpus.rb +57 -0
- data/lib/datasets/kuzushiji-mnist.rb +16 -0
- data/lib/datasets/lazy.rb +90 -0
- data/lib/datasets/libsvm-dataset-list.rb +5 -8
- data/lib/datasets/libsvm.rb +3 -4
- data/lib/datasets/license.rb +26 -0
- data/lib/datasets/livedoor-news.rb +80 -0
- data/lib/datasets/metadata.rb +14 -0
- data/lib/datasets/mnist.rb +7 -7
- data/lib/datasets/mushroom.rb +5 -8
- data/lib/datasets/nagoya-university-conversation-corpus.rb +109 -0
- data/lib/datasets/penguins.rb +6 -8
- data/lib/datasets/penn-treebank.rb +2 -4
- data/lib/datasets/pmjt-dataset-list.rb +67 -0
- data/lib/datasets/postal-code-japan.rb +2 -6
- data/lib/datasets/quora-duplicate-question-pair.rb +51 -0
- data/lib/datasets/{rdatasets.rb → rdataset.rb} +66 -15
- data/lib/datasets/seaborn.rb +90 -0
- data/lib/datasets/sudachi-synonym-dictionary.rb +5 -11
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets/wikipedia-kyoto-japanese-english.rb +219 -0
- data/lib/datasets/wikipedia.rb +16 -8
- data/lib/datasets/wine.rb +6 -9
- data/lib/datasets/zip-extractor.rb +48 -0
- data/lib/datasets.rb +2 -22
- data/red-datasets.gemspec +1 -1
- data/test/helper.rb +21 -0
- data/test/test-afinn.rb +60 -0
- data/test/test-aozora-bunko.rb +190 -0
- data/test/test-california-housing.rb +56 -0
- data/test/test-cldr-plurals.rb +1 -1
- data/test/test-dataset.rb +15 -7
- data/test/test-diamonds.rb +71 -0
- data/test/test-fuel-economy.rb +75 -0
- data/test/test-geolonia.rb +65 -0
- data/test/test-ita-corpus.rb +69 -0
- data/test/test-kuzushiji-mnist.rb +137 -0
- data/test/test-license.rb +24 -0
- data/test/test-livedoor-news.rb +351 -0
- data/test/test-metadata.rb +36 -0
- data/test/test-nagoya-university-conversation-corpus.rb +132 -0
- data/test/test-penguins.rb +1 -1
- data/test/test-pmjt-dataset-list.rb +50 -0
- data/test/test-quora-duplicate-question-pair.rb +33 -0
- data/test/test-rdataset.rb +246 -0
- data/test/{test-seaborn-data.rb → test-seaborn.rb} +71 -4
- data/test/test-sudachi-synonym-dictionary.rb +5 -5
- data/test/test-wikipedia-kyoto-japanese-english.rb +178 -0
- data/test/test-wikipedia.rb +25 -71
- metadata +62 -14
- data/lib/datasets/seaborn-data.rb +0 -49
- data/test/test-rdatasets.rb +0 -136
data/test/test-wikipedia.rb
CHANGED
@@ -1,100 +1,54 @@
|
|
1
1
|
class WikipediaTest < Test::Unit::TestCase
|
2
|
-
sub_test_case("
|
2
|
+
sub_test_case("en") do
|
3
3
|
sub_test_case("articles") do
|
4
|
-
include Helper::Sandbox
|
5
|
-
|
6
4
|
def setup
|
7
|
-
|
8
|
-
@dataset = Datasets::Wikipedia.new(language: :ja,
|
5
|
+
@dataset = Datasets::Wikipedia.new(language: :en,
|
9
6
|
type: :articles)
|
10
|
-
def @dataset.cache_dir_path
|
11
|
-
@cache_dir_path
|
12
|
-
end
|
13
|
-
def @dataset.cache_dir_path=(path)
|
14
|
-
@cache_dir_path = path
|
15
|
-
end
|
16
|
-
@dataset.cache_dir_path = @tmp_dir
|
17
|
-
end
|
18
|
-
|
19
|
-
def teardown
|
20
|
-
teardown_sandbox
|
21
7
|
end
|
22
8
|
|
23
9
|
test("#each") do
|
24
|
-
|
25
|
-
xml_path = output_path.sub_ext("")
|
26
|
-
xml_path.open("w") do |xml_file|
|
27
|
-
xml_file.puts(<<-XML)
|
28
|
-
<mediawiki
|
29
|
-
xmlns="http://www.mediawiki.org/xml/export-0.10/"
|
30
|
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
31
|
-
xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.10/ http://www.mediawiki.org/xml/export-0.10.xsd"
|
32
|
-
version="0.10" xml:lang="ja">
|
33
|
-
<siteinfo>
|
34
|
-
<sitename>Wikipedia</sitename>
|
35
|
-
</siteinfo>
|
36
|
-
<page>
|
37
|
-
<title>タイトル</title>
|
38
|
-
<ns>4</ns>
|
39
|
-
<id>1</id>
|
40
|
-
<restrictions>sysop</restrictions>
|
41
|
-
<revision>
|
42
|
-
<id>3</id>
|
43
|
-
<parentid>2</parentid>
|
44
|
-
<timestamp>2004-04-30T14:46:00Z</timestamp>
|
45
|
-
<contributor>
|
46
|
-
<username>user</username>
|
47
|
-
<id>10</id>
|
48
|
-
</contributor>
|
49
|
-
<minor />
|
50
|
-
<comment>コメント</comment>
|
51
|
-
<model>wikitext</model>
|
52
|
-
<format>text/x-wiki</format>
|
53
|
-
<text xml:space="preserve">テキスト</text>
|
54
|
-
<sha1>a9674b19f8c56f785c91a555d0a144522bb318e6</sha1>
|
55
|
-
</revision>
|
56
|
-
</page>
|
57
|
-
</mediawiki>
|
58
|
-
XML
|
59
|
-
end
|
60
|
-
unless system("bzip2", xml_path.to_s)
|
61
|
-
raise "failed to run bzip2"
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
contributor = Datasets::Wikipedia::Contributor.new("user", 10)
|
10
|
+
contributor = Datasets::Wikipedia::Contributor.new("Elli", 20842734)
|
66
11
|
revision = Datasets::Wikipedia::Revision.new
|
67
|
-
revision.id =
|
68
|
-
revision.parent_id =
|
69
|
-
revision.timestamp = Time.iso8601("
|
12
|
+
revision.id = 1002250816
|
13
|
+
revision.parent_id = 854851586
|
14
|
+
revision.timestamp = Time.iso8601("2021-01-23T15:15:01Z")
|
70
15
|
revision.contributor = contributor
|
71
|
-
revision.comment = "
|
16
|
+
revision.comment = "shel"
|
72
17
|
revision.model = "wikitext"
|
73
18
|
revision.format = "text/x-wiki"
|
74
|
-
revision.text =
|
75
|
-
|
19
|
+
revision.text = <<-TEXT.chomp
|
20
|
+
#REDIRECT [[Computer accessibility]]
|
21
|
+
|
22
|
+
{{rcat shell|
|
23
|
+
{{R from move}}
|
24
|
+
{{R from CamelCase}}
|
25
|
+
{{R unprintworthy}}
|
26
|
+
}}
|
27
|
+
TEXT
|
28
|
+
revision.sha1 = "kmysdltgexdwkv2xsml3j44jb56dxvn"
|
76
29
|
page = Datasets::Wikipedia::Page.new
|
77
|
-
page.title = "
|
78
|
-
page.namespace =
|
79
|
-
page.id =
|
80
|
-
page.restrictions =
|
30
|
+
page.title = "AccessibleComputing"
|
31
|
+
page.namespace = 0
|
32
|
+
page.id = 10
|
33
|
+
page.restrictions = nil
|
34
|
+
page.redirect = "Computer accessibility"
|
81
35
|
page.revision = revision
|
82
36
|
assert_equal(page, @dataset.each.first)
|
83
37
|
end
|
84
38
|
|
85
39
|
sub_test_case("#metadata") do
|
86
40
|
test("#id") do
|
87
|
-
assert_equal("wikipedia-
|
41
|
+
assert_equal("wikipedia-en-articles",
|
88
42
|
@dataset.metadata.id)
|
89
43
|
end
|
90
44
|
|
91
45
|
test("#name") do
|
92
|
-
assert_equal("Wikipedia articles (
|
46
|
+
assert_equal("Wikipedia articles (en)",
|
93
47
|
@dataset.metadata.name)
|
94
48
|
end
|
95
49
|
|
96
50
|
test("#description") do
|
97
|
-
assert_equal("Wikipedia articles in
|
51
|
+
assert_equal("Wikipedia articles in en",
|
98
52
|
@dataset.metadata.description)
|
99
53
|
end
|
100
54
|
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-datasets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomisuker
|
8
8
|
- Kouhei Sutou
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-05-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: csv
|
@@ -17,14 +17,14 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 3.
|
20
|
+
version: 3.2.4
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: 3.
|
27
|
+
version: 3.2.4
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rexml
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,65 +142,98 @@ files:
|
|
142
142
|
- doc/text/news.md
|
143
143
|
- lib/datasets.rb
|
144
144
|
- lib/datasets/adult.rb
|
145
|
+
- lib/datasets/afinn.rb
|
146
|
+
- lib/datasets/aozora-bunko.rb
|
147
|
+
- lib/datasets/cache-path.rb
|
148
|
+
- lib/datasets/california-housing.rb
|
145
149
|
- lib/datasets/cifar.rb
|
146
150
|
- lib/datasets/cldr-plurals.rb
|
147
151
|
- lib/datasets/communities.rb
|
148
152
|
- lib/datasets/dataset.rb
|
153
|
+
- lib/datasets/diamonds.rb
|
149
154
|
- lib/datasets/dictionary.rb
|
150
155
|
- lib/datasets/downloader.rb
|
151
156
|
- lib/datasets/e-stat-japan.rb
|
152
157
|
- lib/datasets/error.rb
|
153
158
|
- lib/datasets/fashion-mnist.rb
|
159
|
+
- lib/datasets/fuel-economy.rb
|
160
|
+
- lib/datasets/geolonia.rb
|
161
|
+
- lib/datasets/ggplot2-dataset.rb
|
154
162
|
- lib/datasets/hepatitis.rb
|
155
163
|
- lib/datasets/iris.rb
|
164
|
+
- lib/datasets/ita-corpus.rb
|
165
|
+
- lib/datasets/kuzushiji-mnist.rb
|
166
|
+
- lib/datasets/lazy.rb
|
156
167
|
- lib/datasets/libsvm-dataset-list.rb
|
157
168
|
- lib/datasets/libsvm.rb
|
169
|
+
- lib/datasets/license.rb
|
170
|
+
- lib/datasets/livedoor-news.rb
|
158
171
|
- lib/datasets/metadata.rb
|
159
172
|
- lib/datasets/mnist.rb
|
160
173
|
- lib/datasets/mushroom.rb
|
174
|
+
- lib/datasets/nagoya-university-conversation-corpus.rb
|
161
175
|
- lib/datasets/penguins.rb
|
162
176
|
- lib/datasets/penn-treebank.rb
|
177
|
+
- lib/datasets/pmjt-dataset-list.rb
|
163
178
|
- lib/datasets/postal-code-japan.rb
|
164
|
-
- lib/datasets/
|
165
|
-
- lib/datasets/
|
179
|
+
- lib/datasets/quora-duplicate-question-pair.rb
|
180
|
+
- lib/datasets/rdataset.rb
|
181
|
+
- lib/datasets/seaborn.rb
|
166
182
|
- lib/datasets/sudachi-synonym-dictionary.rb
|
167
183
|
- lib/datasets/table.rb
|
168
184
|
- lib/datasets/tar-gz-readable.rb
|
169
185
|
- lib/datasets/version.rb
|
186
|
+
- lib/datasets/wikipedia-kyoto-japanese-english.rb
|
170
187
|
- lib/datasets/wikipedia.rb
|
171
188
|
- lib/datasets/wine.rb
|
189
|
+
- lib/datasets/zip-extractor.rb
|
172
190
|
- red-datasets.gemspec
|
173
191
|
- test/helper.rb
|
174
192
|
- test/run-test.rb
|
175
193
|
- test/test-adult.rb
|
194
|
+
- test/test-afinn.rb
|
195
|
+
- test/test-aozora-bunko.rb
|
196
|
+
- test/test-california-housing.rb
|
176
197
|
- test/test-cifar.rb
|
177
198
|
- test/test-cldr-plurals.rb
|
178
199
|
- test/test-communities.rb
|
179
200
|
- test/test-dataset.rb
|
201
|
+
- test/test-diamonds.rb
|
180
202
|
- test/test-dictionary.rb
|
181
203
|
- test/test-downloader.rb
|
182
204
|
- test/test-e-stat-japan.rb
|
183
205
|
- test/test-fashion-mnist.rb
|
206
|
+
- test/test-fuel-economy.rb
|
207
|
+
- test/test-geolonia.rb
|
184
208
|
- test/test-hepatitis.rb
|
185
209
|
- test/test-iris.rb
|
210
|
+
- test/test-ita-corpus.rb
|
211
|
+
- test/test-kuzushiji-mnist.rb
|
186
212
|
- test/test-libsvm-dataset-list.rb
|
187
213
|
- test/test-libsvm.rb
|
214
|
+
- test/test-license.rb
|
215
|
+
- test/test-livedoor-news.rb
|
216
|
+
- test/test-metadata.rb
|
188
217
|
- test/test-mnist.rb
|
189
218
|
- test/test-mushroom.rb
|
219
|
+
- test/test-nagoya-university-conversation-corpus.rb
|
190
220
|
- test/test-penguins.rb
|
191
221
|
- test/test-penn-treebank.rb
|
222
|
+
- test/test-pmjt-dataset-list.rb
|
192
223
|
- test/test-postal-code-japan.rb
|
193
|
-
- test/test-
|
194
|
-
- test/test-
|
224
|
+
- test/test-quora-duplicate-question-pair.rb
|
225
|
+
- test/test-rdataset.rb
|
226
|
+
- test/test-seaborn.rb
|
195
227
|
- test/test-sudachi-synonym-dictionary.rb
|
196
228
|
- test/test-table.rb
|
229
|
+
- test/test-wikipedia-kyoto-japanese-english.rb
|
197
230
|
- test/test-wikipedia.rb
|
198
231
|
- test/test-wine.rb
|
199
232
|
homepage: https://github.com/red-data-tools/red-datasets
|
200
233
|
licenses:
|
201
234
|
- MIT
|
202
235
|
metadata: {}
|
203
|
-
post_install_message:
|
236
|
+
post_install_message:
|
204
237
|
rdoc_options: []
|
205
238
|
require_paths:
|
206
239
|
- lib
|
@@ -215,34 +248,49 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
215
248
|
- !ruby/object:Gem::Version
|
216
249
|
version: '0'
|
217
250
|
requirements: []
|
218
|
-
rubygems_version: 3.
|
219
|
-
signing_key:
|
251
|
+
rubygems_version: 3.5.0.dev
|
252
|
+
signing_key:
|
220
253
|
specification_version: 4
|
221
254
|
summary: Red Datasets provides classes that provide common datasets such as iris dataset.
|
222
255
|
test_files:
|
223
256
|
- test/helper.rb
|
224
257
|
- test/run-test.rb
|
225
258
|
- test/test-adult.rb
|
259
|
+
- test/test-afinn.rb
|
260
|
+
- test/test-aozora-bunko.rb
|
261
|
+
- test/test-california-housing.rb
|
226
262
|
- test/test-cifar.rb
|
227
263
|
- test/test-cldr-plurals.rb
|
228
264
|
- test/test-communities.rb
|
229
265
|
- test/test-dataset.rb
|
266
|
+
- test/test-diamonds.rb
|
230
267
|
- test/test-dictionary.rb
|
231
268
|
- test/test-downloader.rb
|
232
269
|
- test/test-e-stat-japan.rb
|
233
270
|
- test/test-fashion-mnist.rb
|
271
|
+
- test/test-fuel-economy.rb
|
272
|
+
- test/test-geolonia.rb
|
234
273
|
- test/test-hepatitis.rb
|
235
274
|
- test/test-iris.rb
|
275
|
+
- test/test-ita-corpus.rb
|
276
|
+
- test/test-kuzushiji-mnist.rb
|
236
277
|
- test/test-libsvm-dataset-list.rb
|
237
278
|
- test/test-libsvm.rb
|
279
|
+
- test/test-license.rb
|
280
|
+
- test/test-livedoor-news.rb
|
281
|
+
- test/test-metadata.rb
|
238
282
|
- test/test-mnist.rb
|
239
283
|
- test/test-mushroom.rb
|
284
|
+
- test/test-nagoya-university-conversation-corpus.rb
|
240
285
|
- test/test-penguins.rb
|
241
286
|
- test/test-penn-treebank.rb
|
287
|
+
- test/test-pmjt-dataset-list.rb
|
242
288
|
- test/test-postal-code-japan.rb
|
243
|
-
- test/test-
|
244
|
-
- test/test-
|
289
|
+
- test/test-quora-duplicate-question-pair.rb
|
290
|
+
- test/test-rdataset.rb
|
291
|
+
- test/test-seaborn.rb
|
245
292
|
- test/test-sudachi-synonym-dictionary.rb
|
246
293
|
- test/test-table.rb
|
294
|
+
- test/test-wikipedia-kyoto-japanese-english.rb
|
247
295
|
- test/test-wikipedia.rb
|
248
296
|
- test/test-wine.rb
|
@@ -1,49 +0,0 @@
|
|
1
|
-
module Datasets
|
2
|
-
class SeabornData < Dataset
|
3
|
-
URL_FORMAT = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/%{name}.csv".freeze
|
4
|
-
|
5
|
-
def initialize(name)
|
6
|
-
super()
|
7
|
-
@metadata.id = "seaborn-data-#{name}"
|
8
|
-
@metadata.name = "SeabornData: #{name}"
|
9
|
-
@metadata.url = URL_FORMAT % {name: name}
|
10
|
-
|
11
|
-
@data_path = cache_dir_path + (name + ".csv")
|
12
|
-
@name = name
|
13
|
-
end
|
14
|
-
|
15
|
-
def each(&block)
|
16
|
-
return to_enum(__method__) unless block_given?
|
17
|
-
|
18
|
-
download(@data_path, @metadata.url) unless @data_path.exist?
|
19
|
-
CSV.open(@data_path, headers: :first_row, converters: :all) do |csv|
|
20
|
-
csv.each do |row|
|
21
|
-
record = prepare_record(row)
|
22
|
-
yield record
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
private
|
28
|
-
def prepare_record(csv_row)
|
29
|
-
record = csv_row.to_h
|
30
|
-
record.transform_keys!(&:to_sym)
|
31
|
-
|
32
|
-
# Perform the same preprocessing as seaborn's load_dataset function
|
33
|
-
preprocessor = :"preprocess_#{@name}_record"
|
34
|
-
__send__(preprocessor, record) if respond_to?(preprocessor, true)
|
35
|
-
|
36
|
-
record
|
37
|
-
end
|
38
|
-
|
39
|
-
# The same preprocessing as seaborn.load_dataset
|
40
|
-
def preprocess_flights_record(record)
|
41
|
-
record[:month] &&= record[:month][0,3]
|
42
|
-
end
|
43
|
-
|
44
|
-
# The same preprocessing as seaborn.load_dataset
|
45
|
-
def preprocess_penguins_record(record)
|
46
|
-
record[:sex] &&= record[:sex].capitalize
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
data/test/test-rdatasets.rb
DELETED
@@ -1,136 +0,0 @@
|
|
1
|
-
class RdatasetsTest < Test::Unit::TestCase
|
2
|
-
sub_test_case("RdatasetsList") do
|
3
|
-
def setup
|
4
|
-
@dataset = Datasets::RdatasetsList.new
|
5
|
-
end
|
6
|
-
|
7
|
-
sub_test_case("#each") do
|
8
|
-
test("with package_name") do
|
9
|
-
records = @dataset.filter(package: "datasets").to_a
|
10
|
-
assert_equal([
|
11
|
-
84,
|
12
|
-
{
|
13
|
-
package: "datasets",
|
14
|
-
dataset: "ability.cov",
|
15
|
-
title: "Ability and Intelligence Tests",
|
16
|
-
rows: 6,
|
17
|
-
cols: 8,
|
18
|
-
n_binary: 0,
|
19
|
-
n_character: 0,
|
20
|
-
n_factor: 0,
|
21
|
-
n_logical: 0,
|
22
|
-
n_numeric: 8,
|
23
|
-
csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/ability.cov.csv",
|
24
|
-
doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/ability.cov.html"
|
25
|
-
},
|
26
|
-
{
|
27
|
-
package: "datasets",
|
28
|
-
dataset: "WWWusage",
|
29
|
-
title: "Internet Usage per Minute",
|
30
|
-
rows: 100,
|
31
|
-
cols: 2,
|
32
|
-
n_binary: 0,
|
33
|
-
n_character: 0,
|
34
|
-
n_factor: 0,
|
35
|
-
n_logical: 0,
|
36
|
-
n_numeric: 2,
|
37
|
-
csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/WWWusage.csv",
|
38
|
-
doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/WWWusage.html"
|
39
|
-
}
|
40
|
-
],
|
41
|
-
[
|
42
|
-
records.size,
|
43
|
-
records[0].to_h,
|
44
|
-
records[-1].to_h
|
45
|
-
])
|
46
|
-
end
|
47
|
-
|
48
|
-
test("without package_name") do
|
49
|
-
records = @dataset.each.to_a
|
50
|
-
assert_equal([
|
51
|
-
1714,
|
52
|
-
{
|
53
|
-
package: "AER",
|
54
|
-
dataset: "Affairs",
|
55
|
-
title: "Fair's Extramarital Affairs Data",
|
56
|
-
rows: 601,
|
57
|
-
cols: 9,
|
58
|
-
n_binary: 2,
|
59
|
-
n_character: 0,
|
60
|
-
n_factor: 2,
|
61
|
-
n_logical: 0,
|
62
|
-
n_numeric: 7,
|
63
|
-
csv: "https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv",
|
64
|
-
doc: "https://vincentarelbundock.github.io/Rdatasets/doc/AER/Affairs.html"
|
65
|
-
},
|
66
|
-
{
|
67
|
-
package: "vcd",
|
68
|
-
dataset: "WomenQueue",
|
69
|
-
title: "Women in Queues",
|
70
|
-
rows: 11,
|
71
|
-
cols: 2,
|
72
|
-
n_binary: 0,
|
73
|
-
n_character: 0,
|
74
|
-
n_factor: 1,
|
75
|
-
n_logical: 0,
|
76
|
-
n_numeric: 1,
|
77
|
-
csv: "https://vincentarelbundock.github.io/Rdatasets/csv/vcd/WomenQueue.csv",
|
78
|
-
doc: "https://vincentarelbundock.github.io/Rdatasets/doc/vcd/WomenQueue.html"
|
79
|
-
},
|
80
|
-
],
|
81
|
-
[
|
82
|
-
records.size,
|
83
|
-
records[0].to_h,
|
84
|
-
records[-1].to_h
|
85
|
-
])
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
sub_test_case("Rdatasets") do
|
91
|
-
sub_test_case("datasets") do
|
92
|
-
sub_test_case("AirPassengers") do
|
93
|
-
def setup
|
94
|
-
@dataset = Datasets::Rdatasets.new("datasets", "AirPassengers")
|
95
|
-
end
|
96
|
-
|
97
|
-
test("#each") do
|
98
|
-
records = @dataset.each.to_a
|
99
|
-
assert_equal([
|
100
|
-
144,
|
101
|
-
{ time: 1949, value: 112 },
|
102
|
-
{ time: 1960.91666666667, value: 432 },
|
103
|
-
],
|
104
|
-
[
|
105
|
-
records.size,
|
106
|
-
records[0],
|
107
|
-
records[-1]
|
108
|
-
])
|
109
|
-
end
|
110
|
-
|
111
|
-
test("#metadata.id") do
|
112
|
-
assert_equal("rdatasets-datasets-AirPassengers", @dataset.metadata.id)
|
113
|
-
end
|
114
|
-
|
115
|
-
test("#metadata.description") do
|
116
|
-
description = @dataset.metadata.description
|
117
|
-
assert do
|
118
|
-
description.include?("Monthly Airline Passenger Numbers 1949-1960")
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
test("invalid dataset name") do
|
124
|
-
assert_raise(ArgumentError) do
|
125
|
-
Datasets::Rdatasets.new("datasets", "invalid datasets name")
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
test("invalid package name") do
|
131
|
-
assert_raise(ArgumentError) do
|
132
|
-
Datasets::Rdatasets.new("invalid package name", "AirPassengers")
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|