red-datasets 0.1.4 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -3
  3. data/Rakefile +56 -1
  4. data/doc/text/news.md +102 -0
  5. data/lib/datasets/adult.rb +6 -9
  6. data/lib/datasets/afinn.rb +48 -0
  7. data/lib/datasets/aozora-bunko.rb +196 -0
  8. data/lib/datasets/cache-path.rb +28 -0
  9. data/lib/datasets/california-housing.rb +60 -0
  10. data/lib/datasets/cifar.rb +2 -4
  11. data/lib/datasets/cldr-plurals.rb +2 -4
  12. data/lib/datasets/communities.rb +5 -8
  13. data/lib/datasets/dataset.rb +58 -23
  14. data/lib/datasets/diamonds.rb +26 -0
  15. data/lib/datasets/downloader.rb +110 -30
  16. data/lib/datasets/e-stat-japan.rb +2 -1
  17. data/lib/datasets/fashion-mnist.rb +4 -0
  18. data/lib/datasets/fuel-economy.rb +35 -0
  19. data/lib/datasets/geolonia.rb +67 -0
  20. data/lib/datasets/ggplot2-dataset.rb +79 -0
  21. data/lib/datasets/hepatitis.rb +5 -8
  22. data/lib/datasets/iris.rb +5 -8
  23. data/lib/datasets/ita-corpus.rb +57 -0
  24. data/lib/datasets/kuzushiji-mnist.rb +16 -0
  25. data/lib/datasets/lazy.rb +90 -0
  26. data/lib/datasets/libsvm-dataset-list.rb +5 -8
  27. data/lib/datasets/libsvm.rb +3 -4
  28. data/lib/datasets/license.rb +26 -0
  29. data/lib/datasets/livedoor-news.rb +80 -0
  30. data/lib/datasets/metadata.rb +14 -0
  31. data/lib/datasets/mnist.rb +7 -7
  32. data/lib/datasets/mushroom.rb +5 -8
  33. data/lib/datasets/nagoya-university-conversation-corpus.rb +109 -0
  34. data/lib/datasets/penguins.rb +6 -8
  35. data/lib/datasets/penn-treebank.rb +2 -4
  36. data/lib/datasets/pmjt-dataset-list.rb +67 -0
  37. data/lib/datasets/postal-code-japan.rb +2 -6
  38. data/lib/datasets/quora-duplicate-question-pair.rb +51 -0
  39. data/lib/datasets/{rdatasets.rb → rdataset.rb} +66 -15
  40. data/lib/datasets/seaborn.rb +90 -0
  41. data/lib/datasets/sudachi-synonym-dictionary.rb +5 -11
  42. data/lib/datasets/version.rb +1 -1
  43. data/lib/datasets/wikipedia-kyoto-japanese-english.rb +219 -0
  44. data/lib/datasets/wikipedia.rb +16 -8
  45. data/lib/datasets/wine.rb +6 -9
  46. data/lib/datasets/zip-extractor.rb +48 -0
  47. data/lib/datasets.rb +2 -22
  48. data/red-datasets.gemspec +1 -1
  49. data/test/helper.rb +21 -0
  50. data/test/test-afinn.rb +60 -0
  51. data/test/test-aozora-bunko.rb +190 -0
  52. data/test/test-california-housing.rb +56 -0
  53. data/test/test-cldr-plurals.rb +1 -1
  54. data/test/test-dataset.rb +15 -7
  55. data/test/test-diamonds.rb +71 -0
  56. data/test/test-fuel-economy.rb +75 -0
  57. data/test/test-geolonia.rb +65 -0
  58. data/test/test-ita-corpus.rb +69 -0
  59. data/test/test-kuzushiji-mnist.rb +137 -0
  60. data/test/test-license.rb +24 -0
  61. data/test/test-livedoor-news.rb +351 -0
  62. data/test/test-metadata.rb +36 -0
  63. data/test/test-nagoya-university-conversation-corpus.rb +132 -0
  64. data/test/test-penguins.rb +1 -1
  65. data/test/test-pmjt-dataset-list.rb +50 -0
  66. data/test/test-quora-duplicate-question-pair.rb +33 -0
  67. data/test/test-rdataset.rb +246 -0
  68. data/test/{test-seaborn-data.rb → test-seaborn.rb} +71 -4
  69. data/test/test-sudachi-synonym-dictionary.rb +5 -5
  70. data/test/test-wikipedia-kyoto-japanese-english.rb +178 -0
  71. data/test/test-wikipedia.rb +25 -71
  72. metadata +62 -14
  73. data/lib/datasets/seaborn-data.rb +0 -49
  74. data/test/test-rdatasets.rb +0 -136
@@ -0,0 +1,137 @@
1
+ class KuzushijiMNISTTest < Test::Unit::TestCase
2
+ sub_test_case("Normal") do
3
+ sub_test_case("train") do
4
+ def setup
5
+ @dataset = Datasets::KuzushijiMNIST.new(type: :train)
6
+ end
7
+
8
+ test("#each") do
9
+ records = @dataset.each.to_a
10
+ assert_equal([
11
+ 60000,
12
+ [
13
+ 8,
14
+ 784,
15
+ [213, 233, 255, 186, 2, 0, 0, 0, 0, 0],
16
+ [0, 0, 0, 0, 0, 0, 0, 0, 45, 252],
17
+ ],
18
+ [
19
+ 9,
20
+ 784,
21
+ [81, 246, 254, 155, 224, 255, 230, 39, 0, 0],
22
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
23
+ ],
24
+ ],
25
+ [
26
+ records.size,
27
+ [
28
+ records[0].label,
29
+ records[0].pixels.size,
30
+ records[0].pixels[400, 10],
31
+ records[0].pixels[500, 10],
32
+ ],
33
+ [
34
+ records[-1].label,
35
+ records[-1].pixels.size,
36
+ records[-1].pixels[400, 10],
37
+ records[-1].pixels[500, 10],
38
+ ],
39
+ ])
40
+ end
41
+
42
+ test("#to_table") do
43
+ table_data = @dataset.to_table
44
+ assert_equal([
45
+ [213, 233, 255, 186, 2, 0, 0, 0, 0, 0],
46
+ [81, 246, 254, 155, 224, 255, 230, 39, 0, 0],
47
+ ],
48
+ [
49
+ table_data[:pixels][0][400, 10],
50
+ table_data[:pixels][-1][400, 10],
51
+ ])
52
+ end
53
+
54
+ sub_test_case("#metadata") do
55
+ test("#id") do
56
+ assert_equal("kuzushiji-mnist-train", @dataset.metadata.id)
57
+ end
58
+
59
+ test("#name") do
60
+ assert_equal("Kuzushiji-MNIST: train", @dataset.metadata.name)
61
+ end
62
+ end
63
+ end
64
+
65
+ sub_test_case("test") do
66
+ def setup
67
+ @dataset = Datasets::KuzushijiMNIST.new(type: :test)
68
+ end
69
+
70
+ test("#each") do
71
+ records = @dataset.each.to_a
72
+ assert_equal([
73
+ 10000,
74
+ [
75
+ 2,
76
+ 784,
77
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 75],
78
+ [44, 255, 255, 246, 119, 252, 46, 0, 70, 255],
79
+ ],
80
+ [
81
+ 2,
82
+ 784,
83
+ [0, 0, 0, 0, 0, 0, 0, 84, 255, 192],
84
+ [0, 0, 0, 0, 0, 23, 245, 92, 42, 254],
85
+ ],
86
+ ],
87
+ [
88
+ records.size,
89
+ [
90
+ records[0].label,
91
+ records[0].pixels.size,
92
+ records[0].pixels[400, 10],
93
+ records[0].pixels[500, 10],
94
+ ],
95
+ [
96
+ records[-1].label,
97
+ records[-1].pixels.size,
98
+ records[-1].pixels[400, 10],
99
+ records[-1].pixels[500, 10],
100
+ ],
101
+ ])
102
+ end
103
+
104
+ test("#to_table") do
105
+ table_data = @dataset.to_table
106
+ assert_equal([
107
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 75],
108
+ [0, 0, 0, 0, 0, 0, 0, 84, 255, 192],
109
+ ],
110
+ [
111
+ table_data[:pixels][0][400, 10],
112
+ table_data[:pixels][-1][400, 10],
113
+ ])
114
+ end
115
+
116
+ sub_test_case("#metadata") do
117
+ test("#id") do
118
+ assert_equal("kuzushiji-mnist-test", @dataset.metadata.id)
119
+ end
120
+
121
+ test("#name") do
122
+ assert_equal("Kuzushiji-MNIST: test", @dataset.metadata.name)
123
+ end
124
+ end
125
+ end
126
+ end
127
+
128
+ sub_test_case("Abnormal") do
129
+ test("invalid type") do
130
+ invalid_type = :invalid
131
+ message = "Please set type :train or :test: #{invalid_type.inspect}"
132
+ assert_raise(ArgumentError.new(message)) do
133
+ Datasets::KuzushijiMNIST.new(type: invalid_type)
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,24 @@
1
+ class LicenseTest < Test::Unit::TestCase
2
+ sub_test_case(".try_convert") do
3
+ test("String") do
4
+ assert_equal(Datasets::License.new("Apache-2.0"),
5
+ Datasets::License.try_convert("Apache-2.0"))
6
+ end
7
+
8
+ test("{spdx_id:}") do
9
+ assert_equal(Datasets::License.new("Apache-2.0"),
10
+ Datasets::License.try_convert(spdx_id: "Apache-2.0"))
11
+ end
12
+
13
+ test("{name:, url:}") do
14
+ license = {
15
+ name: "Quora's Terms of Service",
16
+ url: "https://www.quora.com/about/tos",
17
+ }
18
+ assert_equal(Datasets::License.new(nil,
19
+ "Quora's Terms of Service",
20
+ "https://www.quora.com/about/tos"),
21
+ Datasets::License.try_convert(license))
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,351 @@
1
+ class LivedoorNewsTest < Test::Unit::TestCase
2
+ sub_test_case("type") do
3
+ test("topic_news") do
4
+ dataset = Datasets::LivedoorNews.new(type: :topic_news)
5
+ records = dataset.to_a
6
+ assert_equal([
7
+ 770,
8
+ [
9
+ "http://news.livedoor.com/article/detail/5903225/",
10
+ Time.iso8601("2011-10-02T10:00:00+0900"),
11
+ "悪評が次から次へと溢",
12
+ "/5571502/\n"
13
+ ],
14
+ [
15
+ "http://news.livedoor.com/article/detail/6918105/",
16
+ Time.iso8601("2012-09-04T12:45:00+0900"),
17
+ "ジャンプ連載漫画が終",
18
+ "提案も散見された。\n"
19
+ ],
20
+ ],
21
+ [
22
+ records.size,
23
+ [
24
+ records[0].url,
25
+ records[0].timestamp,
26
+ records[0].sentence[0, 10],
27
+ records[0].sentence[-10, 10]
28
+ ],
29
+ [
30
+ records[-1].url,
31
+ records[-1].timestamp,
32
+ records[-1].sentence[0, 10],
33
+ records[-1].sentence[-10, 10]
34
+ ],
35
+ ])
36
+ end
37
+
38
+ test("sports_watch") do
39
+ dataset = Datasets::LivedoorNews.new(type: :sports_watch)
40
+ records = dataset.to_a
41
+ assert_equal([
42
+ 900,
43
+ [
44
+ "http://news.livedoor.com/article/detail/4597641/",
45
+ Time.iso8601("2010-02-10T10:50:00+0900"),
46
+ "【Sports Wa",
47
+ "送る秋山であった。\n"
48
+ ],
49
+ [
50
+ "http://news.livedoor.com/article/detail/6917848/",
51
+ Time.iso8601("2012-09-04T11:25:00+0900"),
52
+ "ジーコ、本田圭佑につ",
53
+ "る」と語っている。\n"
54
+ ],
55
+ ],
56
+ [
57
+ records.size,
58
+ [
59
+ records[0].url,
60
+ records[0].timestamp,
61
+ records[0].sentence[0, 10],
62
+ records[0].sentence[-10, 10]
63
+ ],
64
+ [
65
+ records[-1].url,
66
+ records[-1].timestamp,
67
+ records[-1].sentence[0, 10],
68
+ records[-1].sentence[-10, 10]
69
+ ],
70
+ ])
71
+ end
72
+
73
+ test("it_life_hack") do
74
+ dataset = Datasets::LivedoorNews.new(type: :it_life_hack)
75
+ records = dataset.to_a
76
+ assert_equal([
77
+ 870,
78
+ [
79
+ "http://news.livedoor.com/article/detail/6292880/",
80
+ Time.iso8601("2012-02-19T13:00:00+0900"),
81
+ "旧式Macで禁断のパ",
82
+ "p\n" + "クチコミを見る\n"
83
+ ],
84
+ [
85
+ "http://news.livedoor.com/article/detail/6918825/",
86
+ Time.iso8601("2012-09-04T15:00:00+0900"),
87
+ "レノボWindows",
88
+ "J\n" + "クチコミを見る\n"
89
+ ],
90
+ ],
91
+ [
92
+ records.size,
93
+ [
94
+ records[0].url,
95
+ records[0].timestamp,
96
+ records[0].sentence[0, 10],
97
+ records[0].sentence[-10, 10]
98
+ ],
99
+ [
100
+ records[-1].url,
101
+ records[-1].timestamp,
102
+ records[-1].sentence[0, 10],
103
+ records[-1].sentence[-10, 10]
104
+ ],
105
+ ])
106
+ end
107
+
108
+ test("kaden_channel") do
109
+ dataset = Datasets::LivedoorNews.new(type: :kaden_channel)
110
+ records = dataset.to_a
111
+ assert_equal([
112
+ 864,
113
+ [
114
+ "http://news.livedoor.com/article/detail/5774093/",
115
+ Time.iso8601("2011-08-10T10:00:00+0900"),
116
+ "【ニュース】電力使用",
117
+ "に備える【デジ通】\n"
118
+ ],
119
+ [
120
+ "http://news.livedoor.com/article/detail/6919353/",
121
+ Time.iso8601("2012-09-04T17:00:00+0900"),
122
+ "Hulu、ついに待望",
123
+ "uに今後も注目だ。\n"
124
+ ],
125
+ ],
126
+ [
127
+ records.size,
128
+ [
129
+ records[0].url,
130
+ records[0].timestamp,
131
+ records[0].sentence[0, 10],
132
+ records[0].sentence[-10, 10]
133
+ ],
134
+ [
135
+ records[-1].url,
136
+ records[-1].timestamp,
137
+ records[-1].sentence[0, 10],
138
+ records[-1].sentence[-10, 10]
139
+ ],
140
+ ])
141
+ end
142
+
143
+ test("movie_enter") do
144
+ dataset = Datasets::LivedoorNews.new(type: :movie_enter)
145
+ records = dataset.to_a
146
+ assert_equal([
147
+ 870,
148
+ [
149
+ "http://news.livedoor.com/article/detail/5840081/",
150
+ Time.iso8601("2011-09-08T10:00:00+0900"),
151
+ "インタビュー:宮崎あ",
152
+ "ない。 1 2 3\n"
153
+ ],
154
+ [
155
+ "http://news.livedoor.com/article/detail/6909318/",
156
+ Time.iso8601("2012-09-01T10:30:00+0900"),
157
+ "【週末映画まとめ読み",
158
+ "レイ+DVDセット\n"
159
+ ],
160
+ ],
161
+ [
162
+ records.size,
163
+ [
164
+ records[0].url,
165
+ records[0].timestamp,
166
+ records[0].sentence[0, 10],
167
+ records[0].sentence[-10, 10]
168
+ ],
169
+ [
170
+ records[-1].url,
171
+ records[-1].timestamp,
172
+ records[-1].sentence[0, 10],
173
+ records[-1].sentence[-10, 10]
174
+ ],
175
+ ])
176
+ end
177
+
178
+ test("dokujo_tsushin") do
179
+ dataset = Datasets::LivedoorNews.new(type: :dokujo_tsushin)
180
+ records = dataset.to_a
181
+ assert_equal([
182
+ 870,
183
+ [
184
+ "http://news.livedoor.com/article/detail/4778030/",
185
+ Time.iso8601("2010-05-22T14:30:00+0900"),
186
+ "友人代表のスピーチ、",
187
+ "も幸あれ(高山惠)\n"
188
+ ],
189
+ [
190
+ "http://news.livedoor.com/article/detail/6915005/",
191
+ Time.iso8601("2012-09-03T14:00:00+0900"),
192
+ "男女間で“カワイイ”",
193
+ "ツー/神田はるひ)\n"
194
+ ],
195
+ ],
196
+ [
197
+ records.size,
198
+ [
199
+ records[0].url,
200
+ records[0].timestamp,
201
+ records[0].sentence[0, 10],
202
+ records[0].sentence[-10, 10]
203
+ ],
204
+ [
205
+ records[-1].url,
206
+ records[-1].timestamp,
207
+ records[-1].sentence[0, 10],
208
+ records[-1].sentence[-10, 10]
209
+ ],
210
+ ])
211
+ end
212
+
213
+ test("smax") do
214
+ dataset = Datasets::LivedoorNews.new(type: :smax)
215
+ records = dataset.to_a
216
+ assert_equal([
217
+ 870,
218
+ [
219
+ "http://news.livedoor.com/article/detail/6507397/",
220
+ Time.iso8601("2012-04-26T16:55:00+0900"),
221
+ "あのアプリもこのアプ",
222
+ "n Twitter\n"
223
+ ],
224
+ [
225
+ "http://news.livedoor.com/article/detail/6919324/",
226
+ Time.iso8601("2012-09-04T16:55:00+0900"),
227
+ "【究極にカスタマイズ",
228
+ "個人) : 富士通\n"
229
+ ],
230
+ ],
231
+ [
232
+ records.size,
233
+ [
234
+ records[0].url,
235
+ records[0].timestamp,
236
+ records[0].sentence[0, 10],
237
+ records[0].sentence[-10, 10]
238
+ ],
239
+ [
240
+ records[-1].url,
241
+ records[-1].timestamp,
242
+ records[-1].sentence[0, 10],
243
+ records[-1].sentence[-10, 10]
244
+ ],
245
+ ])
246
+ end
247
+
248
+ test("livedoor_homme") do
249
+ dataset = Datasets::LivedoorNews.new(type: :livedoor_homme)
250
+ records = dataset.to_a
251
+ assert_equal([
252
+ 511,
253
+ [
254
+ "http://news.livedoor.com/article/detail/4568088/",
255
+ Time.iso8601("2010-01-24T18:10:00+0900"),
256
+ "フォーエバー21旗艦",
257
+ "フォーエバー21」\n"
258
+ ],
259
+ [
260
+ "http://news.livedoor.com/article/detail/6828491/",
261
+ Time.iso8601("2012-08-06T14:30:00+0900"),
262
+ "【女子座談会】ぶっち",
263
+ "タートキャンペーン\n"
264
+ ],
265
+ ],
266
+ [
267
+ records.size,
268
+ [
269
+ records[0].url,
270
+ records[0].timestamp,
271
+ records[0].sentence[0, 10],
272
+ records[0].sentence[-10, 10]
273
+ ],
274
+ [
275
+ records[-1].url,
276
+ records[-1].timestamp,
277
+ records[-1].sentence[0, 10],
278
+ records[-1].sentence[-10, 10]
279
+ ],
280
+ ])
281
+ end
282
+
283
+ test("peachy") do
284
+ dataset = Datasets::LivedoorNews.new(type: :peachy)
285
+ records = dataset.to_a
286
+ assert_equal([
287
+ 842,
288
+ [
289
+ "http://news.livedoor.com/article/detail/4289213/",
290
+ Time.iso8601("2009-08-07T20:30:00+0900"),
291
+ "韓国の伝統菓子を食べ",
292
+ "試してみませんか?\n"
293
+ ],
294
+ [
295
+ "http://news.livedoor.com/article/detail/6908055/",
296
+ Time.iso8601("2012-09-01T18:00:00+0900"),
297
+ "初デートで彼を悶絶さ",
298
+ "hyアプリの使い方\n"
299
+ ],
300
+ ],
301
+ [
302
+ records.size,
303
+ [
304
+ records[0].url,
305
+ records[0].timestamp,
306
+ records[0].sentence[0, 10],
307
+ records[0].sentence[-10, 10]
308
+ ],
309
+ [
310
+ records[-1].url,
311
+ records[-1].timestamp,
312
+ records[-1].sentence[0, 10],
313
+ records[-1].sentence[-10, 10]
314
+ ],
315
+ ])
316
+ end
317
+
318
+ test("invalid") do
319
+ message = ":type must be one of [" +
320
+ ":topic_news, " +
321
+ ":sports_watch, " +
322
+ ":it_life_hack, " +
323
+ ":kaden_channel, " +
324
+ ":movie_enter, " +
325
+ ":dokujo_tsushin, " +
326
+ ":smax, " +
327
+ ":livedoor_homme, " +
328
+ ":peachy" +
329
+ "]: :invalid"
330
+ assert_raise(ArgumentError.new(message)) do
331
+ Datasets::LivedoorNews.new(type: :invalid)
332
+ end
333
+ end
334
+ end
335
+
336
+ sub_test_case("#metadata") do
337
+ test("#description") do
338
+ dataset = Datasets::LivedoorNews.new(type: :topic_news)
339
+ description = dataset.metadata.description
340
+ assert_equal([
341
+ "livedoor ニ",
342
+ "に感謝いたします。\n"
343
+ ],
344
+ [
345
+ description[0,10],
346
+ description[-10,10]
347
+ ],
348
+ description)
349
+ end
350
+ end
351
+ end
@@ -0,0 +1,36 @@
1
+ class MetadataTest < Test::Unit::TestCase
2
+ def setup
3
+ @metadata = Datasets::Metadata.new
4
+ end
5
+
6
+ sub_test_case("#licenses") do
7
+ test("String") do
8
+ @metadata.licenses = "Apache-2.0"
9
+ assert_equal([Datasets::License.new("Apache-2.0")],
10
+ @metadata.licenses)
11
+ end
12
+
13
+ test("[String]") do
14
+ @metadata.licenses = ["Apache-2.0"]
15
+ assert_equal([Datasets::License.new("Apache-2.0")],
16
+ @metadata.licenses)
17
+ end
18
+
19
+ test("{name:, url:}") do
20
+ @metadata.licenses = {
21
+ name: "Quora's Terms of Service",
22
+ url: "https://www.quora.com/about/tos",
23
+ }
24
+ assert_equal([Datasets::License.new(nil,
25
+ "Quora's Terms of Service",
26
+ "https://www.quora.com/about/tos")],
27
+ @metadata.licenses)
28
+ end
29
+
30
+ test("Symbol") do
31
+ assert_raise(ArgumentError.new("invalid license: :apache_2_0")) do
32
+ @metadata.licenses = :apache_2_0
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,132 @@
1
+ class NagoyaUniversityConversationCorpusTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::NagoyaUniversityConversationCorpus.new
4
+ end
5
+
6
+ sub_test_case("each") do
7
+ test("#sentences") do
8
+ records = @dataset.each.to_a
9
+ first_sentences = records[0].sentences
10
+ last_sentences = records[-1].sentences
11
+ assert_equal([
12
+ 856,
13
+ {
14
+ participant_id: 'F107',
15
+ content: '***の町というのはちいちゃくって、城壁がこう町全体をぐるっと回ってて、それが城壁の上を歩いても1時間ぐらいですよね。'
16
+ },
17
+ {
18
+ participant_id: nil,
19
+ content: nil
20
+ },
21
+ 603,
22
+ {
23
+ participant_id: 'F007',
24
+ content: 'それでは話を始めまーす。'
25
+ },
26
+ {
27
+ participant_id: nil,
28
+ content: nil
29
+ }
30
+ ],
31
+ [
32
+ first_sentences.size,
33
+ first_sentences[0].to_h,
34
+ first_sentences[-1].to_h,
35
+ last_sentences.size,
36
+ last_sentences[0].to_h,
37
+ last_sentences[-1].to_h,
38
+ ])
39
+ end
40
+
41
+ test("#participants") do
42
+ records = @dataset.each.to_a
43
+ first_participants = records[0].participants
44
+ last_participants = records[-1].participants
45
+ assert_equal([
46
+ 4,
47
+ {
48
+ id: 'F107',
49
+ attribute: '女性30代後半',
50
+ birthplace: '愛知県幡豆郡出身',
51
+ residence: '愛知県幡豆郡在住'
52
+ },
53
+ {
54
+ id: 'F128',
55
+ attribute: '女性20代前半',
56
+ birthplace: '愛知県西尾市出身',
57
+ residence: '西尾市在住'
58
+ },
59
+ 2,
60
+ {
61
+ id: 'F007',
62
+ attribute: '女性50代後半',
63
+ birthplace: '東京都出身',
64
+ residence: '東京都国分寺市在住'
65
+ },
66
+ {
67
+ id: 'F003',
68
+ attribute: '女性80代後半',
69
+ birthplace: '栃木県宇都宮市出身',
70
+ residence: '国分寺市在住'
71
+ }
72
+ ],
73
+ [
74
+ first_participants.size,
75
+ first_participants[0].to_h,
76
+ first_participants[-1].to_h,
77
+ last_participants.size,
78
+ last_participants[0].to_h,
79
+ last_participants[-1].to_h
80
+ ])
81
+ end
82
+
83
+ test("others") do
84
+ records = @dataset.each.to_a
85
+ assert_equal([
86
+ 129,
87
+ [
88
+ '1(約35分)',
89
+ '2001年10月16日',
90
+ 'ファミリーレストラン',
91
+ '英会話教室の友人',
92
+ nil
93
+ ],
94
+ [
95
+ '129(36分)',
96
+ '2003年2月16日',
97
+ '二人の自宅',
98
+ '母と娘',
99
+ 'F007は東京に38年、F003は東京に60年居住。'
100
+ ]
101
+ ],
102
+ [
103
+ records.size,
104
+ [
105
+ records[0].name,
106
+ records[0].date,
107
+ records[0].place,
108
+ records[0].relationships,
109
+ records[0].note
110
+ ],
111
+ [
112
+ records[-1].name,
113
+ records[-1].date,
114
+ records[-1].place,
115
+ records[-1].relationships,
116
+ records[-1].note
117
+ ]
118
+ ])
119
+ end
120
+ end
121
+
122
+ sub_test_case("#metadata") do
123
+ test("#description") do
124
+ description = @dataset.metadata.description
125
+ assert_equal(<<~DESCRIPTION, description)
126
+ The "Nagoya University Conversation Corpus" is a corpus of 129 conversations,
127
+ total about 100 hours of chatting among native speakers of Japanese,
128
+ which is converted into text.
129
+ DESCRIPTION
130
+ end
131
+ end
132
+ end
@@ -7,7 +7,7 @@ class PenguinsTest < Test::Unit::TestCase
7
7
  dataset = cls.new
8
8
  dataset.data_path.relative_path_from(dataset.send(:cache_dir_path)).to_s
9
9
  }
10
- assert_equal(["penguins/adelie.csv", "penguins/gentoo.csv", "penguins/chinstrap.csv"],
10
+ assert_equal(["adelie.csv", "gentoo.csv", "chinstrap.csv"],
11
11
  data_paths)
12
12
  end
13
13
  end