red-datasets 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -3
  3. data/Rakefile +56 -1
  4. data/doc/text/news.md +102 -0
  5. data/lib/datasets/adult.rb +6 -9
  6. data/lib/datasets/afinn.rb +48 -0
  7. data/lib/datasets/aozora-bunko.rb +196 -0
  8. data/lib/datasets/cache-path.rb +28 -0
  9. data/lib/datasets/california-housing.rb +60 -0
  10. data/lib/datasets/cifar.rb +2 -4
  11. data/lib/datasets/cldr-plurals.rb +2 -4
  12. data/lib/datasets/communities.rb +5 -8
  13. data/lib/datasets/dataset.rb +58 -23
  14. data/lib/datasets/diamonds.rb +26 -0
  15. data/lib/datasets/downloader.rb +110 -30
  16. data/lib/datasets/e-stat-japan.rb +2 -1
  17. data/lib/datasets/fashion-mnist.rb +4 -0
  18. data/lib/datasets/fuel-economy.rb +35 -0
  19. data/lib/datasets/geolonia.rb +67 -0
  20. data/lib/datasets/ggplot2-dataset.rb +79 -0
  21. data/lib/datasets/hepatitis.rb +5 -8
  22. data/lib/datasets/iris.rb +5 -8
  23. data/lib/datasets/ita-corpus.rb +57 -0
  24. data/lib/datasets/kuzushiji-mnist.rb +16 -0
  25. data/lib/datasets/lazy.rb +90 -0
  26. data/lib/datasets/libsvm-dataset-list.rb +5 -8
  27. data/lib/datasets/libsvm.rb +3 -4
  28. data/lib/datasets/license.rb +26 -0
  29. data/lib/datasets/livedoor-news.rb +80 -0
  30. data/lib/datasets/metadata.rb +14 -0
  31. data/lib/datasets/mnist.rb +7 -7
  32. data/lib/datasets/mushroom.rb +5 -8
  33. data/lib/datasets/nagoya-university-conversation-corpus.rb +109 -0
  34. data/lib/datasets/penguins.rb +6 -8
  35. data/lib/datasets/penn-treebank.rb +2 -4
  36. data/lib/datasets/pmjt-dataset-list.rb +67 -0
  37. data/lib/datasets/postal-code-japan.rb +2 -6
  38. data/lib/datasets/quora-duplicate-question-pair.rb +51 -0
  39. data/lib/datasets/{rdatasets.rb → rdataset.rb} +66 -15
  40. data/lib/datasets/seaborn.rb +90 -0
  41. data/lib/datasets/sudachi-synonym-dictionary.rb +5 -11
  42. data/lib/datasets/version.rb +1 -1
  43. data/lib/datasets/wikipedia-kyoto-japanese-english.rb +219 -0
  44. data/lib/datasets/wikipedia.rb +16 -8
  45. data/lib/datasets/wine.rb +6 -9
  46. data/lib/datasets/zip-extractor.rb +48 -0
  47. data/lib/datasets.rb +2 -22
  48. data/red-datasets.gemspec +1 -1
  49. data/test/helper.rb +21 -0
  50. data/test/test-afinn.rb +60 -0
  51. data/test/test-aozora-bunko.rb +190 -0
  52. data/test/test-california-housing.rb +56 -0
  53. data/test/test-cldr-plurals.rb +1 -1
  54. data/test/test-dataset.rb +15 -7
  55. data/test/test-diamonds.rb +71 -0
  56. data/test/test-fuel-economy.rb +75 -0
  57. data/test/test-geolonia.rb +65 -0
  58. data/test/test-ita-corpus.rb +69 -0
  59. data/test/test-kuzushiji-mnist.rb +137 -0
  60. data/test/test-license.rb +24 -0
  61. data/test/test-livedoor-news.rb +351 -0
  62. data/test/test-metadata.rb +36 -0
  63. data/test/test-nagoya-university-conversation-corpus.rb +132 -0
  64. data/test/test-penguins.rb +1 -1
  65. data/test/test-pmjt-dataset-list.rb +50 -0
  66. data/test/test-quora-duplicate-question-pair.rb +33 -0
  67. data/test/test-rdataset.rb +246 -0
  68. data/test/{test-seaborn-data.rb → test-seaborn.rb} +71 -4
  69. data/test/test-sudachi-synonym-dictionary.rb +5 -5
  70. data/test/test-wikipedia-kyoto-japanese-english.rb +178 -0
  71. data/test/test-wikipedia.rb +25 -71
  72. metadata +62 -14
  73. data/lib/datasets/seaborn-data.rb +0 -49
  74. data/test/test-rdatasets.rb +0 -136
@@ -0,0 +1,137 @@
1
+ class KuzushijiMNISTTest < Test::Unit::TestCase
2
+ sub_test_case("Normal") do
3
+ sub_test_case("train") do
4
+ def setup
5
+ @dataset = Datasets::KuzushijiMNIST.new(type: :train)
6
+ end
7
+
8
+ test("#each") do
9
+ records = @dataset.each.to_a
10
+ assert_equal([
11
+ 60000,
12
+ [
13
+ 8,
14
+ 784,
15
+ [213, 233, 255, 186, 2, 0, 0, 0, 0, 0],
16
+ [0, 0, 0, 0, 0, 0, 0, 0, 45, 252],
17
+ ],
18
+ [
19
+ 9,
20
+ 784,
21
+ [81, 246, 254, 155, 224, 255, 230, 39, 0, 0],
22
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
23
+ ],
24
+ ],
25
+ [
26
+ records.size,
27
+ [
28
+ records[0].label,
29
+ records[0].pixels.size,
30
+ records[0].pixels[400, 10],
31
+ records[0].pixels[500, 10],
32
+ ],
33
+ [
34
+ records[-1].label,
35
+ records[-1].pixels.size,
36
+ records[-1].pixels[400, 10],
37
+ records[-1].pixels[500, 10],
38
+ ],
39
+ ])
40
+ end
41
+
42
+ test("#to_table") do
43
+ table_data = @dataset.to_table
44
+ assert_equal([
45
+ [213, 233, 255, 186, 2, 0, 0, 0, 0, 0],
46
+ [81, 246, 254, 155, 224, 255, 230, 39, 0, 0],
47
+ ],
48
+ [
49
+ table_data[:pixels][0][400, 10],
50
+ table_data[:pixels][-1][400, 10],
51
+ ])
52
+ end
53
+
54
+ sub_test_case("#metadata") do
55
+ test("#id") do
56
+ assert_equal("kuzushiji-mnist-train", @dataset.metadata.id)
57
+ end
58
+
59
+ test("#name") do
60
+ assert_equal("Kuzushiji-MNIST: train", @dataset.metadata.name)
61
+ end
62
+ end
63
+ end
64
+
65
+ sub_test_case("test") do
66
+ def setup
67
+ @dataset = Datasets::KuzushijiMNIST.new(type: :test)
68
+ end
69
+
70
+ test("#each") do
71
+ records = @dataset.each.to_a
72
+ assert_equal([
73
+ 10000,
74
+ [
75
+ 2,
76
+ 784,
77
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 75],
78
+ [44, 255, 255, 246, 119, 252, 46, 0, 70, 255],
79
+ ],
80
+ [
81
+ 2,
82
+ 784,
83
+ [0, 0, 0, 0, 0, 0, 0, 84, 255, 192],
84
+ [0, 0, 0, 0, 0, 23, 245, 92, 42, 254],
85
+ ],
86
+ ],
87
+ [
88
+ records.size,
89
+ [
90
+ records[0].label,
91
+ records[0].pixels.size,
92
+ records[0].pixels[400, 10],
93
+ records[0].pixels[500, 10],
94
+ ],
95
+ [
96
+ records[-1].label,
97
+ records[-1].pixels.size,
98
+ records[-1].pixels[400, 10],
99
+ records[-1].pixels[500, 10],
100
+ ],
101
+ ])
102
+ end
103
+
104
+ test("#to_table") do
105
+ table_data = @dataset.to_table
106
+ assert_equal([
107
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 75],
108
+ [0, 0, 0, 0, 0, 0, 0, 84, 255, 192],
109
+ ],
110
+ [
111
+ table_data[:pixels][0][400, 10],
112
+ table_data[:pixels][-1][400, 10],
113
+ ])
114
+ end
115
+
116
+ sub_test_case("#metadata") do
117
+ test("#id") do
118
+ assert_equal("kuzushiji-mnist-test", @dataset.metadata.id)
119
+ end
120
+
121
+ test("#name") do
122
+ assert_equal("Kuzushiji-MNIST: test", @dataset.metadata.name)
123
+ end
124
+ end
125
+ end
126
+ end
127
+
128
+ sub_test_case("Abnormal") do
129
+ test("invalid type") do
130
+ invalid_type = :invalid
131
+ message = "Please set type :train or :test: #{invalid_type.inspect}"
132
+ assert_raise(ArgumentError.new(message)) do
133
+ Datasets::KuzushijiMNIST.new(type: invalid_type)
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,24 @@
1
+ class LicenseTest < Test::Unit::TestCase
2
+ sub_test_case(".try_convert") do
3
+ test("String") do
4
+ assert_equal(Datasets::License.new("Apache-2.0"),
5
+ Datasets::License.try_convert("Apache-2.0"))
6
+ end
7
+
8
+ test("{spdx_id:}") do
9
+ assert_equal(Datasets::License.new("Apache-2.0"),
10
+ Datasets::License.try_convert(spdx_id: "Apache-2.0"))
11
+ end
12
+
13
+ test("{name:, url:}") do
14
+ license = {
15
+ name: "Quora's Terms of Service",
16
+ url: "https://www.quora.com/about/tos",
17
+ }
18
+ assert_equal(Datasets::License.new(nil,
19
+ "Quora's Terms of Service",
20
+ "https://www.quora.com/about/tos"),
21
+ Datasets::License.try_convert(license))
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,351 @@
1
+ class LivedoorNewsTest < Test::Unit::TestCase
2
+ sub_test_case("type") do
3
+ test("topic_news") do
4
+ dataset = Datasets::LivedoorNews.new(type: :topic_news)
5
+ records = dataset.to_a
6
+ assert_equal([
7
+ 770,
8
+ [
9
+ "http://news.livedoor.com/article/detail/5903225/",
10
+ Time.iso8601("2011-10-02T10:00:00+0900"),
11
+ "悪評が次から次へと溢",
12
+ "/5571502/\n"
13
+ ],
14
+ [
15
+ "http://news.livedoor.com/article/detail/6918105/",
16
+ Time.iso8601("2012-09-04T12:45:00+0900"),
17
+ "ジャンプ連載漫画が終",
18
+ "提案も散見された。\n"
19
+ ],
20
+ ],
21
+ [
22
+ records.size,
23
+ [
24
+ records[0].url,
25
+ records[0].timestamp,
26
+ records[0].sentence[0, 10],
27
+ records[0].sentence[-10, 10]
28
+ ],
29
+ [
30
+ records[-1].url,
31
+ records[-1].timestamp,
32
+ records[-1].sentence[0, 10],
33
+ records[-1].sentence[-10, 10]
34
+ ],
35
+ ])
36
+ end
37
+
38
+ test("sports_watch") do
39
+ dataset = Datasets::LivedoorNews.new(type: :sports_watch)
40
+ records = dataset.to_a
41
+ assert_equal([
42
+ 900,
43
+ [
44
+ "http://news.livedoor.com/article/detail/4597641/",
45
+ Time.iso8601("2010-02-10T10:50:00+0900"),
46
+ "【Sports Wa",
47
+ "送る秋山であった。\n"
48
+ ],
49
+ [
50
+ "http://news.livedoor.com/article/detail/6917848/",
51
+ Time.iso8601("2012-09-04T11:25:00+0900"),
52
+ "ジーコ、本田圭佑につ",
53
+ "る」と語っている。\n"
54
+ ],
55
+ ],
56
+ [
57
+ records.size,
58
+ [
59
+ records[0].url,
60
+ records[0].timestamp,
61
+ records[0].sentence[0, 10],
62
+ records[0].sentence[-10, 10]
63
+ ],
64
+ [
65
+ records[-1].url,
66
+ records[-1].timestamp,
67
+ records[-1].sentence[0, 10],
68
+ records[-1].sentence[-10, 10]
69
+ ],
70
+ ])
71
+ end
72
+
73
+ test("it_life_hack") do
74
+ dataset = Datasets::LivedoorNews.new(type: :it_life_hack)
75
+ records = dataset.to_a
76
+ assert_equal([
77
+ 870,
78
+ [
79
+ "http://news.livedoor.com/article/detail/6292880/",
80
+ Time.iso8601("2012-02-19T13:00:00+0900"),
81
+ "旧式Macで禁断のパ",
82
+ "p\n" + "クチコミを見る\n"
83
+ ],
84
+ [
85
+ "http://news.livedoor.com/article/detail/6918825/",
86
+ Time.iso8601("2012-09-04T15:00:00+0900"),
87
+ "レノボWindows",
88
+ "J\n" + "クチコミを見る\n"
89
+ ],
90
+ ],
91
+ [
92
+ records.size,
93
+ [
94
+ records[0].url,
95
+ records[0].timestamp,
96
+ records[0].sentence[0, 10],
97
+ records[0].sentence[-10, 10]
98
+ ],
99
+ [
100
+ records[-1].url,
101
+ records[-1].timestamp,
102
+ records[-1].sentence[0, 10],
103
+ records[-1].sentence[-10, 10]
104
+ ],
105
+ ])
106
+ end
107
+
108
+ test("kaden_channel") do
109
+ dataset = Datasets::LivedoorNews.new(type: :kaden_channel)
110
+ records = dataset.to_a
111
+ assert_equal([
112
+ 864,
113
+ [
114
+ "http://news.livedoor.com/article/detail/5774093/",
115
+ Time.iso8601("2011-08-10T10:00:00+0900"),
116
+ "【ニュース】電力使用",
117
+ "に備える【デジ通】\n"
118
+ ],
119
+ [
120
+ "http://news.livedoor.com/article/detail/6919353/",
121
+ Time.iso8601("2012-09-04T17:00:00+0900"),
122
+ "Hulu、ついに待望",
123
+ "uに今後も注目だ。\n"
124
+ ],
125
+ ],
126
+ [
127
+ records.size,
128
+ [
129
+ records[0].url,
130
+ records[0].timestamp,
131
+ records[0].sentence[0, 10],
132
+ records[0].sentence[-10, 10]
133
+ ],
134
+ [
135
+ records[-1].url,
136
+ records[-1].timestamp,
137
+ records[-1].sentence[0, 10],
138
+ records[-1].sentence[-10, 10]
139
+ ],
140
+ ])
141
+ end
142
+
143
+ test("movie_enter") do
144
+ dataset = Datasets::LivedoorNews.new(type: :movie_enter)
145
+ records = dataset.to_a
146
+ assert_equal([
147
+ 870,
148
+ [
149
+ "http://news.livedoor.com/article/detail/5840081/",
150
+ Time.iso8601("2011-09-08T10:00:00+0900"),
151
+ "インタビュー:宮崎あ",
152
+ "ない。 1 2 3\n"
153
+ ],
154
+ [
155
+ "http://news.livedoor.com/article/detail/6909318/",
156
+ Time.iso8601("2012-09-01T10:30:00+0900"),
157
+ "【週末映画まとめ読み",
158
+ "レイ+DVDセット\n"
159
+ ],
160
+ ],
161
+ [
162
+ records.size,
163
+ [
164
+ records[0].url,
165
+ records[0].timestamp,
166
+ records[0].sentence[0, 10],
167
+ records[0].sentence[-10, 10]
168
+ ],
169
+ [
170
+ records[-1].url,
171
+ records[-1].timestamp,
172
+ records[-1].sentence[0, 10],
173
+ records[-1].sentence[-10, 10]
174
+ ],
175
+ ])
176
+ end
177
+
178
+ test("dokujo_tsushin") do
179
+ dataset = Datasets::LivedoorNews.new(type: :dokujo_tsushin)
180
+ records = dataset.to_a
181
+ assert_equal([
182
+ 870,
183
+ [
184
+ "http://news.livedoor.com/article/detail/4778030/",
185
+ Time.iso8601("2010-05-22T14:30:00+0900"),
186
+ "友人代表のスピーチ、",
187
+ "も幸あれ(高山惠)\n"
188
+ ],
189
+ [
190
+ "http://news.livedoor.com/article/detail/6915005/",
191
+ Time.iso8601("2012-09-03T14:00:00+0900"),
192
+ "男女間で“カワイイ”",
193
+ "ツー/神田はるひ)\n"
194
+ ],
195
+ ],
196
+ [
197
+ records.size,
198
+ [
199
+ records[0].url,
200
+ records[0].timestamp,
201
+ records[0].sentence[0, 10],
202
+ records[0].sentence[-10, 10]
203
+ ],
204
+ [
205
+ records[-1].url,
206
+ records[-1].timestamp,
207
+ records[-1].sentence[0, 10],
208
+ records[-1].sentence[-10, 10]
209
+ ],
210
+ ])
211
+ end
212
+
213
+ test("smax") do
214
+ dataset = Datasets::LivedoorNews.new(type: :smax)
215
+ records = dataset.to_a
216
+ assert_equal([
217
+ 870,
218
+ [
219
+ "http://news.livedoor.com/article/detail/6507397/",
220
+ Time.iso8601("2012-04-26T16:55:00+0900"),
221
+ "あのアプリもこのアプ",
222
+ "n Twitter\n"
223
+ ],
224
+ [
225
+ "http://news.livedoor.com/article/detail/6919324/",
226
+ Time.iso8601("2012-09-04T16:55:00+0900"),
227
+ "【究極にカスタマイズ",
228
+ "個人) : 富士通\n"
229
+ ],
230
+ ],
231
+ [
232
+ records.size,
233
+ [
234
+ records[0].url,
235
+ records[0].timestamp,
236
+ records[0].sentence[0, 10],
237
+ records[0].sentence[-10, 10]
238
+ ],
239
+ [
240
+ records[-1].url,
241
+ records[-1].timestamp,
242
+ records[-1].sentence[0, 10],
243
+ records[-1].sentence[-10, 10]
244
+ ],
245
+ ])
246
+ end
247
+
248
+ test("livedoor_homme") do
249
+ dataset = Datasets::LivedoorNews.new(type: :livedoor_homme)
250
+ records = dataset.to_a
251
+ assert_equal([
252
+ 511,
253
+ [
254
+ "http://news.livedoor.com/article/detail/4568088/",
255
+ Time.iso8601("2010-01-24T18:10:00+0900"),
256
+ "フォーエバー21旗艦",
257
+ "フォーエバー21」\n"
258
+ ],
259
+ [
260
+ "http://news.livedoor.com/article/detail/6828491/",
261
+ Time.iso8601("2012-08-06T14:30:00+0900"),
262
+ "【女子座談会】ぶっち",
263
+ "タートキャンペーン\n"
264
+ ],
265
+ ],
266
+ [
267
+ records.size,
268
+ [
269
+ records[0].url,
270
+ records[0].timestamp,
271
+ records[0].sentence[0, 10],
272
+ records[0].sentence[-10, 10]
273
+ ],
274
+ [
275
+ records[-1].url,
276
+ records[-1].timestamp,
277
+ records[-1].sentence[0, 10],
278
+ records[-1].sentence[-10, 10]
279
+ ],
280
+ ])
281
+ end
282
+
283
+ test("peachy") do
284
+ dataset = Datasets::LivedoorNews.new(type: :peachy)
285
+ records = dataset.to_a
286
+ assert_equal([
287
+ 842,
288
+ [
289
+ "http://news.livedoor.com/article/detail/4289213/",
290
+ Time.iso8601("2009-08-07T20:30:00+0900"),
291
+ "韓国の伝統菓子を食べ",
292
+ "試してみませんか?\n"
293
+ ],
294
+ [
295
+ "http://news.livedoor.com/article/detail/6908055/",
296
+ Time.iso8601("2012-09-01T18:00:00+0900"),
297
+ "初デートで彼を悶絶さ",
298
+ "hyアプリの使い方\n"
299
+ ],
300
+ ],
301
+ [
302
+ records.size,
303
+ [
304
+ records[0].url,
305
+ records[0].timestamp,
306
+ records[0].sentence[0, 10],
307
+ records[0].sentence[-10, 10]
308
+ ],
309
+ [
310
+ records[-1].url,
311
+ records[-1].timestamp,
312
+ records[-1].sentence[0, 10],
313
+ records[-1].sentence[-10, 10]
314
+ ],
315
+ ])
316
+ end
317
+
318
+ test("invalid") do
319
+ message = ":type must be one of [" +
320
+ ":topic_news, " +
321
+ ":sports_watch, " +
322
+ ":it_life_hack, " +
323
+ ":kaden_channel, " +
324
+ ":movie_enter, " +
325
+ ":dokujo_tsushin, " +
326
+ ":smax, " +
327
+ ":livedoor_homme, " +
328
+ ":peachy" +
329
+ "]: :invalid"
330
+ assert_raise(ArgumentError.new(message)) do
331
+ Datasets::LivedoorNews.new(type: :invalid)
332
+ end
333
+ end
334
+ end
335
+
336
+ sub_test_case("#metadata") do
337
+ test("#description") do
338
+ dataset = Datasets::LivedoorNews.new(type: :topic_news)
339
+ description = dataset.metadata.description
340
+ assert_equal([
341
+ "livedoor ニ",
342
+ "に感謝いたします。\n"
343
+ ],
344
+ [
345
+ description[0,10],
346
+ description[-10,10]
347
+ ],
348
+ description)
349
+ end
350
+ end
351
+ end
@@ -0,0 +1,36 @@
1
+ class MetadataTest < Test::Unit::TestCase
2
+ def setup
3
+ @metadata = Datasets::Metadata.new
4
+ end
5
+
6
+ sub_test_case("#licenses") do
7
+ test("String") do
8
+ @metadata.licenses = "Apache-2.0"
9
+ assert_equal([Datasets::License.new("Apache-2.0")],
10
+ @metadata.licenses)
11
+ end
12
+
13
+ test("[String]") do
14
+ @metadata.licenses = ["Apache-2.0"]
15
+ assert_equal([Datasets::License.new("Apache-2.0")],
16
+ @metadata.licenses)
17
+ end
18
+
19
+ test("{name:, url:}") do
20
+ @metadata.licenses = {
21
+ name: "Quora's Terms of Service",
22
+ url: "https://www.quora.com/about/tos",
23
+ }
24
+ assert_equal([Datasets::License.new(nil,
25
+ "Quora's Terms of Service",
26
+ "https://www.quora.com/about/tos")],
27
+ @metadata.licenses)
28
+ end
29
+
30
+ test("Symbol") do
31
+ assert_raise(ArgumentError.new("invalid license: :apache_2_0")) do
32
+ @metadata.licenses = :apache_2_0
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,132 @@
1
+ class NagoyaUniversityConversationCorpusTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::NagoyaUniversityConversationCorpus.new
4
+ end
5
+
6
+ sub_test_case("each") do
7
+ test("#sentences") do
8
+ records = @dataset.each.to_a
9
+ first_sentences = records[0].sentences
10
+ last_sentences = records[-1].sentences
11
+ assert_equal([
12
+ 856,
13
+ {
14
+ participant_id: 'F107',
15
+ content: '***の町というのはちいちゃくって、城壁がこう町全体をぐるっと回ってて、それが城壁の上を歩いても1時間ぐらいですよね。'
16
+ },
17
+ {
18
+ participant_id: nil,
19
+ content: nil
20
+ },
21
+ 603,
22
+ {
23
+ participant_id: 'F007',
24
+ content: 'それでは話を始めまーす。'
25
+ },
26
+ {
27
+ participant_id: nil,
28
+ content: nil
29
+ }
30
+ ],
31
+ [
32
+ first_sentences.size,
33
+ first_sentences[0].to_h,
34
+ first_sentences[-1].to_h,
35
+ last_sentences.size,
36
+ last_sentences[0].to_h,
37
+ last_sentences[-1].to_h,
38
+ ])
39
+ end
40
+
41
+ test("#participants") do
42
+ records = @dataset.each.to_a
43
+ first_participants = records[0].participants
44
+ last_participants = records[-1].participants
45
+ assert_equal([
46
+ 4,
47
+ {
48
+ id: 'F107',
49
+ attribute: '女性30代後半',
50
+ birthplace: '愛知県幡豆郡出身',
51
+ residence: '愛知県幡豆郡在住'
52
+ },
53
+ {
54
+ id: 'F128',
55
+ attribute: '女性20代前半',
56
+ birthplace: '愛知県西尾市出身',
57
+ residence: '西尾市在住'
58
+ },
59
+ 2,
60
+ {
61
+ id: 'F007',
62
+ attribute: '女性50代後半',
63
+ birthplace: '東京都出身',
64
+ residence: '東京都国分寺市在住'
65
+ },
66
+ {
67
+ id: 'F003',
68
+ attribute: '女性80代後半',
69
+ birthplace: '栃木県宇都宮市出身',
70
+ residence: '国分寺市在住'
71
+ }
72
+ ],
73
+ [
74
+ first_participants.size,
75
+ first_participants[0].to_h,
76
+ first_participants[-1].to_h,
77
+ last_participants.size,
78
+ last_participants[0].to_h,
79
+ last_participants[-1].to_h
80
+ ])
81
+ end
82
+
83
+ test("others") do
84
+ records = @dataset.each.to_a
85
+ assert_equal([
86
+ 129,
87
+ [
88
+ '1(約35分)',
89
+ '2001年10月16日',
90
+ 'ファミリーレストラン',
91
+ '英会話教室の友人',
92
+ nil
93
+ ],
94
+ [
95
+ '129(36分)',
96
+ '2003年2月16日',
97
+ '二人の自宅',
98
+ '母と娘',
99
+ 'F007は東京に38年、F003は東京に60年居住。'
100
+ ]
101
+ ],
102
+ [
103
+ records.size,
104
+ [
105
+ records[0].name,
106
+ records[0].date,
107
+ records[0].place,
108
+ records[0].relationships,
109
+ records[0].note
110
+ ],
111
+ [
112
+ records[-1].name,
113
+ records[-1].date,
114
+ records[-1].place,
115
+ records[-1].relationships,
116
+ records[-1].note
117
+ ]
118
+ ])
119
+ end
120
+ end
121
+
122
+ sub_test_case("#metadata") do
123
+ test("#description") do
124
+ description = @dataset.metadata.description
125
+ assert_equal(<<~DESCRIPTION, description)
126
+ The "Nagoya University Conversation Corpus" is a corpus of 129 conversations,
127
+ total about 100 hours of chatting among native speakers of Japanese,
128
+ which is converted into text.
129
+ DESCRIPTION
130
+ end
131
+ end
132
+ end
@@ -7,7 +7,7 @@ class PenguinsTest < Test::Unit::TestCase
7
7
  dataset = cls.new
8
8
  dataset.data_path.relative_path_from(dataset.send(:cache_dir_path)).to_s
9
9
  }
10
- assert_equal(["penguins/adelie.csv", "penguins/gentoo.csv", "penguins/chinstrap.csv"],
10
+ assert_equal(["adelie.csv", "gentoo.csv", "chinstrap.csv"],
11
11
  data_paths)
12
12
  end
13
13
  end