red-datasets 0.1.6 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/Rakefile +10 -0
- data/doc/text/news.md +36 -0
- data/lib/datasets/california-housing.rb +1 -1
- data/lib/datasets/dataset.rb +2 -2
- data/lib/datasets/downloader.rb +34 -16
- data/lib/datasets/fashion-mnist.rb +6 -2
- data/lib/datasets/ggplot2-dataset.rb +3 -3
- data/lib/datasets/house-of-councillor.rb +169 -0
- data/lib/datasets/house-of-representative.rb +107 -0
- data/lib/datasets/japanese-date-parser.rb +38 -0
- data/lib/datasets/kuzushiji-mnist.rb +6 -2
- data/lib/datasets/lazy.rb +2 -0
- data/lib/datasets/libsvm-dataset-list.rb +1 -1
- data/lib/datasets/mnist.rb +12 -6
- data/lib/datasets/nagoya-university-conversation-corpus.rb +6 -6
- data/lib/datasets/postal-code-japan.rb +3 -3
- data/lib/datasets/quora-duplicate-question-pair.rb +1 -1
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets/wikipedia-kyoto-japanese-english.rb +2 -2
- data/lib/datasets/wikipedia.rb +2 -2
- data/test/japanese-date-parser-test.rb +27 -0
- data/test/test-adult.rb +36 -86
- data/test/test-aozora-bunko.rb +5 -5
- data/test/test-california-housing.rb +12 -31
- data/test/test-cldr-plurals.rb +1 -1
- data/test/test-diamonds.rb +13 -33
- data/test/test-downloader.rb +1 -1
- data/test/test-geolonia.rb +17 -41
- data/test/test-house-of-councillor.rb +223 -0
- data/test/test-house-of-representative.rb +54 -0
- data/test/test-nagoya-university-conversation-corpus.rb +17 -69
- data/test/test-postal-code-japan.rb +7 -0
- data/test/test-quora-duplicate-question-pair.rb +7 -21
- data/test/test-rdataset.rb +24 -22
- data/test/test-sudachi-synonym-dictionary.rb +12 -31
- data/test/test-wikipedia.rb +5 -5
- metadata +12 -6
@@ -0,0 +1,223 @@
|
|
1
|
+
class HouseOfCouncillorTest < Test::Unit::TestCase
|
2
|
+
test("invalid") do
|
3
|
+
message = ":type must be one of [:bill, :in_house_group, :member, :question]: :invalid"
|
4
|
+
assert_raise(ArgumentError.new(message)) do
|
5
|
+
Datasets::HouseOfCouncillor.new(type: :invalid)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
sub_test_case(":bill") do
|
10
|
+
def setup
|
11
|
+
@dataset = Datasets::HouseOfCouncillor.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def record(*args)
|
15
|
+
Datasets::HouseOfCouncillor::Bill.new(*args)
|
16
|
+
end
|
17
|
+
|
18
|
+
test("#each") do
|
19
|
+
assert_equal(record(153,
|
20
|
+
"法律案(内閣提出)",
|
21
|
+
153,
|
22
|
+
1,
|
23
|
+
"司法制度改革推進法案",
|
24
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/gian/153/meisai/m15303153001.htm",
|
25
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/gian/153/pdf/5315310.pdf",
|
26
|
+
nil,
|
27
|
+
Date.parse("2001-09-28"),
|
28
|
+
Date.parse("2001-10-30"),
|
29
|
+
nil,
|
30
|
+
"衆先議",
|
31
|
+
nil,
|
32
|
+
nil,
|
33
|
+
nil,
|
34
|
+
nil,
|
35
|
+
Date.parse("2001-10-31"),
|
36
|
+
"法務委員会",
|
37
|
+
Date.parse("2001-11-08"),
|
38
|
+
"可決",
|
39
|
+
Date.parse("2001-11-09"),
|
40
|
+
"可決",
|
41
|
+
nil,
|
42
|
+
"多数",
|
43
|
+
"押しボタン",
|
44
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/vote/153/153-1109-v005.htm",
|
45
|
+
Date.parse("2001-10-18"),
|
46
|
+
"法務委員会",
|
47
|
+
Date.parse("2001-10-26"),
|
48
|
+
"可決",
|
49
|
+
Date.parse("2001-10-30"),
|
50
|
+
"可決",
|
51
|
+
nil,
|
52
|
+
"多数",
|
53
|
+
"起立",
|
54
|
+
Date.parse("2001-11-16"),
|
55
|
+
119,
|
56
|
+
nil,
|
57
|
+
nil),
|
58
|
+
@dataset.each.next)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
sub_test_case(":in_house_group") do
|
63
|
+
def setup
|
64
|
+
@dataset = Datasets::HouseOfCouncillor.new(type: :in_house_group)
|
65
|
+
end
|
66
|
+
|
67
|
+
def record(*args)
|
68
|
+
Datasets::HouseOfCouncillor::InHouseGroup.new(*args)
|
69
|
+
end
|
70
|
+
|
71
|
+
test("#each") do
|
72
|
+
records = @dataset.each.to_a
|
73
|
+
assert_equal([
|
74
|
+
10,
|
75
|
+
record(Date.parse("2024-01-26"),
|
76
|
+
"自由民主党",
|
77
|
+
"自民",
|
78
|
+
Date.parse("2024-08-30"),
|
79
|
+
114,
|
80
|
+
23,
|
81
|
+
Date.parse("2025-07-28"),
|
82
|
+
19,
|
83
|
+
5,
|
84
|
+
33,
|
85
|
+
6,
|
86
|
+
52,
|
87
|
+
11,
|
88
|
+
Date.parse("2028-07-25"),
|
89
|
+
18,
|
90
|
+
5,
|
91
|
+
44,
|
92
|
+
7,
|
93
|
+
62,
|
94
|
+
12),
|
95
|
+
record(Date.parse("2024-01-26"),
|
96
|
+
"各派に属しない議員",
|
97
|
+
"無所属",
|
98
|
+
Date.parse("2024-08-30"),
|
99
|
+
12,
|
100
|
+
4,
|
101
|
+
Date.parse("2025-07-28"),
|
102
|
+
1,
|
103
|
+
0,
|
104
|
+
7,
|
105
|
+
2,
|
106
|
+
8,
|
107
|
+
2,
|
108
|
+
Date.parse("2028-07-25"),
|
109
|
+
1,
|
110
|
+
0,
|
111
|
+
3,
|
112
|
+
2,
|
113
|
+
4,
|
114
|
+
2),
|
115
|
+
],
|
116
|
+
[
|
117
|
+
records.size,
|
118
|
+
records.first,
|
119
|
+
records.last,
|
120
|
+
])
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
sub_test_case(":member") do
|
125
|
+
def setup
|
126
|
+
@dataset = Datasets::HouseOfCouncillor.new(type: :member)
|
127
|
+
end
|
128
|
+
|
129
|
+
def record(*args)
|
130
|
+
Datasets::HouseOfCouncillor::Member.new(*args)
|
131
|
+
end
|
132
|
+
|
133
|
+
test("#each") do
|
134
|
+
records = @dataset.each.to_a
|
135
|
+
assert_equal([
|
136
|
+
245,
|
137
|
+
record("足立 敏之",
|
138
|
+
nil,
|
139
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/profile/7016001.htm",
|
140
|
+
"あだち としゆき",
|
141
|
+
"自民",
|
142
|
+
"比例",
|
143
|
+
Date.parse("2028-07-25"),
|
144
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/g7016001.jpg",
|
145
|
+
[2016, 2022],
|
146
|
+
2,
|
147
|
+
"財政金融委員会(長)",
|
148
|
+
Date.parse("2024-08-30"),
|
149
|
+
"昭和29年5月20日兵庫県西宮市生まれ。(本籍地・京都府福知山市)昭和48年和歌山県立桐蔭高等学校卒業、昭和52年京都大学工学部土木工学科卒業、昭和54年京都大学大学院工学研究科修士課程修了、同年建設省入省後、兵庫県庁、東北及び関東地方整備局、河川局河川計画課河川事業調整官、内閣官房(安全保障・危機管理担当)等を経て、平成15年近畿地方整備局企画部長、平成18年河川局河川計画課長、平成21年四国地方整備局長、平成23年中部地方整備局長、平成24年水管理・国土保全局長、平成25年技監、平成26年国土交通省を退職。平成28年第24回参議院議員通常選挙で初当選○参議院予算委員会理事、災害対策特別委員会理事○著書「激甚化する水害」「いいね!建設産業本当の魅力」(日経BP社)",
|
150
|
+
Date.parse("2022-11-30")),
|
151
|
+
record("渡辺 猛之",
|
152
|
+
nil,
|
153
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/profile/7010055.htm",
|
154
|
+
"わたなべ たけゆき",
|
155
|
+
"自民",
|
156
|
+
"岐阜",
|
157
|
+
Date.parse("2028-07-25"),
|
158
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/g7010055.jpg",
|
159
|
+
[2010, 2016, 2022],
|
160
|
+
3,
|
161
|
+
"経済産業委員会、議院運営委員会(理)",
|
162
|
+
Date.parse("2024-08-30"),
|
163
|
+
"昭和43年4月18日生、岐阜県加茂郡八百津町出身。岐阜県立加茂高等学校、名古屋大学経済学部卒業。平成4年、財団法人松下政経塾入塾(第13期生)。平成7年、同塾卒業後、26歳で岐阜県議会議員に初当選。以後通算4期当選。在任中は、自民党岐阜県連副幹事長、岐阜県商工会青年部連合会会長、岐阜県商工政治連盟会長、県監査委員、県政自民クラブ幹事長を歴任。平成22年7月、参議院議員初当選○農林水産委員長、政治倫理の確立及び選挙制度に関する特別委員長、参議院自民党筆頭副幹事長、国土交通副大臣兼内閣府副大臣兼復興副大臣を歴任○現在議院運営委員会筆頭理事。環境委員",
|
164
|
+
Date.parse("2022-11-30")),
|
165
|
+
],
|
166
|
+
[
|
167
|
+
records.size,
|
168
|
+
records.first,
|
169
|
+
records.last,
|
170
|
+
])
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
sub_test_case(":question") do
|
175
|
+
def setup
|
176
|
+
@dataset = Datasets::HouseOfCouncillor.new(type: :question)
|
177
|
+
end
|
178
|
+
|
179
|
+
def record(*args)
|
180
|
+
Datasets::HouseOfCouncillor::Question.new(*args)
|
181
|
+
end
|
182
|
+
|
183
|
+
test("#each") do
|
184
|
+
records = @dataset.each.to_a
|
185
|
+
assert_equal([
|
186
|
+
7833,
|
187
|
+
record(1,
|
188
|
+
1,
|
189
|
+
"食生活安定に関する質問主意書",
|
190
|
+
"市来 乙彦",
|
191
|
+
1,
|
192
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/001/syuh/s001001.htm",
|
193
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/001/touh/t001001.htm",
|
194
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/001/syup/s001001.pdf",
|
195
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/001/toup/t001001.pdf",
|
196
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/001/meisai/m001001.htm",
|
197
|
+
Date.parse("1947-06-06"),
|
198
|
+
Date.parse("1947-06-23"),
|
199
|
+
Date.parse("1947-06-28"),
|
200
|
+
nil),
|
201
|
+
record(213,
|
202
|
+
239,
|
203
|
+
"地方自治法第二百五十二条の二十六の五に定める各大臣の「生命等の保護の措置に関する指示」と地方自治の本旨等との関係に関する質問主意書",
|
204
|
+
"小西 洋之",
|
205
|
+
1,
|
206
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/213/syuh/s213239.htm",
|
207
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/213/touh/t213239.htm",
|
208
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/213/syup/s213239.pdf",
|
209
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/213/toup/t213239.pdf",
|
210
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/213/meisai/m213239.htm",
|
211
|
+
Date.parse("2024-06-21"),
|
212
|
+
Date.parse("2024-06-21"),
|
213
|
+
Date.parse("2024-07-02"),
|
214
|
+
"6月25日内閣から通知書受領(7月2日まで答弁延期)"),
|
215
|
+
],
|
216
|
+
[
|
217
|
+
records.size,
|
218
|
+
records.first,
|
219
|
+
records.last,
|
220
|
+
])
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
class HouseOfRepresentativeTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::HouseOfRepresentative.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def record(*args)
|
7
|
+
Datasets::HouseOfRepresentative::Record.new(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
test("#each") do
|
11
|
+
assert_equal(record(142,
|
12
|
+
"衆法の一覧",
|
13
|
+
nil,
|
14
|
+
139,
|
15
|
+
18,
|
16
|
+
"市民活動促進法案",
|
17
|
+
"成立",
|
18
|
+
"経過",
|
19
|
+
"https://www.shugiin.go.jp/internet/itdb_gian.nsf/html/gian/keika/5516.htm",
|
20
|
+
nil,
|
21
|
+
nil,
|
22
|
+
"衆法",
|
23
|
+
"熊代 昭彦君外四名",
|
24
|
+
%w(自由民主党 社会民主党・市民連合 新党さきがけ),
|
25
|
+
nil,
|
26
|
+
nil,
|
27
|
+
nil,
|
28
|
+
Date.jisx0301("H10.03.04"),
|
29
|
+
Date.jisx0301("H10.03.11"),
|
30
|
+
"内閣",
|
31
|
+
Date.jisx0301("H10.03.17"),
|
32
|
+
"可決",
|
33
|
+
Date.jisx0301("H10.03.19"),
|
34
|
+
"可決",
|
35
|
+
nil,
|
36
|
+
nil,
|
37
|
+
nil,
|
38
|
+
nil,
|
39
|
+
nil,
|
40
|
+
nil,
|
41
|
+
nil,
|
42
|
+
Date.jisx0301("H10.01.12"),
|
43
|
+
"労働・社会政策",
|
44
|
+
Date.jisx0301("H10.03.03"),
|
45
|
+
"修正",
|
46
|
+
Date.jisx0301("H10.03.04"),
|
47
|
+
"修正",
|
48
|
+
Date.jisx0301("H10.03.25"),
|
49
|
+
7,
|
50
|
+
nil,
|
51
|
+
nil),
|
52
|
+
@dataset.each.next)
|
53
|
+
end
|
54
|
+
end
|
@@ -5,116 +5,64 @@ class NagoyaUniversityConversationCorpusTest < Test::Unit::TestCase
|
|
5
5
|
|
6
6
|
sub_test_case("each") do
|
7
7
|
test("#sentences") do
|
8
|
-
|
9
|
-
first_sentences = records[0].sentences
|
10
|
-
last_sentences = records[-1].sentences
|
8
|
+
first_sentences = @dataset.each.next.sentences
|
11
9
|
assert_equal([
|
12
10
|
856,
|
13
11
|
{
|
14
12
|
participant_id: 'F107',
|
15
|
-
content: '***の町というのはちいちゃくって、城壁がこう町全体をぐるっと回ってて、それが城壁の上を歩いても1時間ぐらいですよね。'
|
13
|
+
content: '***の町というのはちいちゃくって、城壁がこう町全体をぐるっと回ってて、それが城壁の上を歩いても1時間ぐらいですよね。',
|
16
14
|
},
|
17
15
|
{
|
18
16
|
participant_id: nil,
|
19
|
-
content: nil
|
17
|
+
content: nil,
|
20
18
|
},
|
21
|
-
603,
|
22
|
-
{
|
23
|
-
participant_id: 'F007',
|
24
|
-
content: 'それでは話を始めまーす。'
|
25
|
-
},
|
26
|
-
{
|
27
|
-
participant_id: nil,
|
28
|
-
content: nil
|
29
|
-
}
|
30
19
|
],
|
31
20
|
[
|
32
21
|
first_sentences.size,
|
33
22
|
first_sentences[0].to_h,
|
34
23
|
first_sentences[-1].to_h,
|
35
|
-
last_sentences.size,
|
36
|
-
last_sentences[0].to_h,
|
37
|
-
last_sentences[-1].to_h,
|
38
24
|
])
|
39
25
|
end
|
40
26
|
|
41
27
|
test("#participants") do
|
42
|
-
|
43
|
-
first_participants = records[0].participants
|
44
|
-
last_participants = records[-1].participants
|
28
|
+
first_participants = @dataset.each.next.participants
|
45
29
|
assert_equal([
|
46
30
|
4,
|
47
31
|
{
|
48
32
|
id: 'F107',
|
49
33
|
attribute: '女性30代後半',
|
50
34
|
birthplace: '愛知県幡豆郡出身',
|
51
|
-
residence: '愛知県幡豆郡在住'
|
35
|
+
residence: '愛知県幡豆郡在住',
|
52
36
|
},
|
53
37
|
{
|
54
38
|
id: 'F128',
|
55
39
|
attribute: '女性20代前半',
|
56
40
|
birthplace: '愛知県西尾市出身',
|
57
|
-
residence: '西尾市在住'
|
41
|
+
residence: '西尾市在住',
|
58
42
|
},
|
59
|
-
2,
|
60
|
-
{
|
61
|
-
id: 'F007',
|
62
|
-
attribute: '女性50代後半',
|
63
|
-
birthplace: '東京都出身',
|
64
|
-
residence: '東京都国分寺市在住'
|
65
|
-
},
|
66
|
-
{
|
67
|
-
id: 'F003',
|
68
|
-
attribute: '女性80代後半',
|
69
|
-
birthplace: '栃木県宇都宮市出身',
|
70
|
-
residence: '国分寺市在住'
|
71
|
-
}
|
72
43
|
],
|
73
44
|
[
|
74
45
|
first_participants.size,
|
75
46
|
first_participants[0].to_h,
|
76
47
|
first_participants[-1].to_h,
|
77
|
-
last_participants.size,
|
78
|
-
last_participants[0].to_h,
|
79
|
-
last_participants[-1].to_h
|
80
48
|
])
|
81
49
|
end
|
82
50
|
|
83
51
|
test("others") do
|
84
|
-
|
52
|
+
first_record = @dataset.each.next
|
85
53
|
assert_equal([
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
'英会話教室の友人',
|
92
|
-
nil
|
93
|
-
],
|
94
|
-
[
|
95
|
-
'129(36分)',
|
96
|
-
'2003年2月16日',
|
97
|
-
'二人の自宅',
|
98
|
-
'母と娘',
|
99
|
-
'F007は東京に38年、F003は東京に60年居住。'
|
100
|
-
]
|
54
|
+
'1(約35分)',
|
55
|
+
'2001年10月16日',
|
56
|
+
'ファミリーレストラン',
|
57
|
+
'英会話教室の友人',
|
58
|
+
nil,
|
101
59
|
],
|
102
60
|
[
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
records[0].relationships,
|
109
|
-
records[0].note
|
110
|
-
],
|
111
|
-
[
|
112
|
-
records[-1].name,
|
113
|
-
records[-1].date,
|
114
|
-
records[-1].place,
|
115
|
-
records[-1].relationships,
|
116
|
-
records[-1].note
|
117
|
-
]
|
61
|
+
first_record.name,
|
62
|
+
first_record.date,
|
63
|
+
first_record.place,
|
64
|
+
first_record.relationships,
|
65
|
+
first_record.note,
|
118
66
|
])
|
119
67
|
end
|
120
68
|
end
|
@@ -1,4 +1,11 @@
|
|
1
1
|
class PostalCodeJapanTest < Test::Unit::TestCase
|
2
|
+
test("invalid") do
|
3
|
+
message = ":reading must be one of [:lowercase, :uppercase, :romaji]: :invalid"
|
4
|
+
assert_raise(ArgumentError.new(message)) do
|
5
|
+
Datasets::PostalCodeJapan.new(reading: :invalid)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
2
9
|
sub_test_case(":reading") do
|
3
10
|
test(":lowercase") do
|
4
11
|
dataset = Datasets::PostalCodeJapan.new(reading: :lowercase)
|
@@ -8,26 +8,12 @@ class QuoraDuplicateQuestionPairTest < Test::Unit::TestCase
|
|
8
8
|
end
|
9
9
|
|
10
10
|
test("#each") do
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
"What is the step by step guide to invest in share market?",
|
19
|
-
false),
|
20
|
-
record(404289,
|
21
|
-
537932,
|
22
|
-
537933,
|
23
|
-
"What is like to have sex with cousin?",
|
24
|
-
"What is it like to have sex with your cousin?",
|
25
|
-
false),
|
26
|
-
],
|
27
|
-
[
|
28
|
-
records.size,
|
29
|
-
records.first,
|
30
|
-
records.last,
|
31
|
-
])
|
11
|
+
assert_equal(record(0,
|
12
|
+
1,
|
13
|
+
2,
|
14
|
+
"What is the step by step guide to invest in share market in india?",
|
15
|
+
"What is the step by step guide to invest in share market?",
|
16
|
+
false),
|
17
|
+
@dataset.each.next)
|
32
18
|
end
|
33
19
|
end
|
data/test/test-rdataset.rb
CHANGED
@@ -8,7 +8,7 @@ class RdatasetTest < Test::Unit::TestCase
|
|
8
8
|
test("with package_name") do
|
9
9
|
records = @dataset.filter(package: "datasets").to_a
|
10
10
|
assert_equal([
|
11
|
-
|
11
|
+
104,
|
12
12
|
{
|
13
13
|
package: "datasets",
|
14
14
|
dataset: "ability.cov",
|
@@ -48,7 +48,7 @@ class RdatasetTest < Test::Unit::TestCase
|
|
48
48
|
test("without package_name") do
|
49
49
|
records = @dataset.each.to_a
|
50
50
|
assert_equal([
|
51
|
-
|
51
|
+
2337,
|
52
52
|
{
|
53
53
|
package: "AER",
|
54
54
|
dataset: "Affairs",
|
@@ -110,8 +110,8 @@ class RdatasetTest < Test::Unit::TestCase
|
|
110
110
|
records = @dataset.each.to_a
|
111
111
|
assert_equal([
|
112
112
|
144,
|
113
|
-
{ time: 1949, value: 112 },
|
114
|
-
{ time: 1960.91666666667, value: 432 },
|
113
|
+
{ time: 1949, value: 112, rownames: 1 },
|
114
|
+
{ time: 1960.91666666667, value: 432, rownames: 144 },
|
115
115
|
],
|
116
116
|
[
|
117
117
|
records.size,
|
@@ -141,8 +141,8 @@ class RdatasetTest < Test::Unit::TestCase
|
|
141
141
|
records = @dataset.each.to_a
|
142
142
|
assert_equal([
|
143
143
|
153,
|
144
|
-
{ Ozone: nil, "Solar.R": nil, Wind: 14.3, Temp: 56, Month: 5, Day: 5 },
|
145
|
-
{ Ozone: 20, "Solar.R": 223, Wind: 11.5, Temp: 68, Month: 9, Day: 30 },
|
144
|
+
{ Ozone: nil, "Solar.R": nil, Wind: 14.3, Temp: 56, Month: 5, Day: 5, rownames: 5 },
|
145
|
+
{ Ozone: 20, "Solar.R": 223, Wind: 11.5, Temp: 68, Month: 9, Day: 30, rownames: 153 },
|
146
146
|
],
|
147
147
|
[
|
148
148
|
records.size,
|
@@ -161,10 +161,10 @@ class RdatasetTest < Test::Unit::TestCase
|
|
161
161
|
records = @dataset.each.to_a
|
162
162
|
assert_equal([
|
163
163
|
182,
|
164
|
-
{ event: 1, mag: 7, station:
|
165
|
-
{ event: 16, mag: 5.1, station: nil, dist: 7.6, accel: 0.28 },
|
166
|
-
{ event: 23, mag: 5.3, station: "c168", dist: 25.3, accel: 0.23 },
|
167
|
-
{ event: 23, mag: 5.3, station:
|
164
|
+
{ event: 1, mag: 7, station: 117, dist: 12, accel: 0.359, rownames: 1 },
|
165
|
+
{ event: 16, mag: 5.1, station: nil, dist: 7.6, accel: 0.28, rownames: 79 },
|
166
|
+
{ event: 23, mag: 5.3, station: "c168", dist: 25.3, accel: 0.23, rownames: 170 },
|
167
|
+
{ event: 23, mag: 5.3, station: 5072, dist: 53.1, accel: 0.022, rownames: 182 }
|
168
168
|
],
|
169
169
|
[
|
170
170
|
records.size,
|
@@ -187,8 +187,8 @@ class RdatasetTest < Test::Unit::TestCase
|
|
187
187
|
records = @dataset.each.to_a
|
188
188
|
assert_equal([
|
189
189
|
192,
|
190
|
-
{ temp: 10, species: 'wheat', start: 0, end: 1
|
191
|
-
{ temp: 40, species: 'rice', start: 18, end: Float::INFINITY, germinated: 12 }
|
190
|
+
{ temp: 10, species: 'wheat', start: 0, end: 1, germinated: 0, rownames: 1 },
|
191
|
+
{ temp: 40, species: 'rice', start: 18, end: Float::INFINITY, germinated: 12, rownames: 192 }
|
192
192
|
],
|
193
193
|
[
|
194
194
|
records.size,
|
@@ -213,25 +213,27 @@ class RdatasetTest < Test::Unit::TestCase
|
|
213
213
|
Order: 398_481,
|
214
214
|
Level: 1,
|
215
215
|
Code: 'A',
|
216
|
-
Parent:
|
216
|
+
Parent: nil,
|
217
217
|
Description: 'AGRICULTURE, FORESTRY AND FISHING',
|
218
218
|
This_item_includes: 'This section includes the exploitation of vegetal and animal natural resources, comprising the activities of growing of crops, raising and breeding of animals, harvesting of timber and other plants, animals or animal products from a farm or their natural habitats.',
|
219
|
-
This_item_also_includes:
|
220
|
-
Rulings:
|
221
|
-
This_item_excludes:
|
222
|
-
"Reference_to_ISIC_Rev._4": 'A'
|
219
|
+
This_item_also_includes: nil,
|
220
|
+
Rulings: nil,
|
221
|
+
This_item_excludes: nil,
|
222
|
+
"Reference_to_ISIC_Rev._4": 'A',
|
223
|
+
rownames: 1
|
223
224
|
},
|
224
225
|
{
|
225
226
|
Order: 399_476,
|
226
227
|
Level: 4,
|
227
|
-
Code:
|
228
|
-
Parent:
|
228
|
+
Code: 99.0,
|
229
|
+
Parent: 99.0,
|
229
230
|
Description: 'Activities of extraterritorial organisations and bodies',
|
230
231
|
This_item_includes: "This class includes:\n- activities of international organisations such as the United Nations and the specialised agencies of the United Nations system, regional bodies etc., the International Monetary Fund, the World Bank, the World Customs Organisation, the Organisation for Economic Co-operation and Development, the organisation of Petroleum Exporting Countries, the European Communities, the European Free Trade Association etc.",
|
231
232
|
This_item_also_includes: "This class also includes:\n- activities of diplomatic and consular missions when being determined by the country of their location rather than by the country they represent",
|
232
|
-
Rulings:
|
233
|
-
This_item_excludes:
|
234
|
-
"Reference_to_ISIC_Rev._4":
|
233
|
+
Rulings: nil,
|
234
|
+
This_item_excludes: nil,
|
235
|
+
"Reference_to_ISIC_Rev._4": 9900,
|
236
|
+
rownames: 996
|
235
237
|
}
|
236
238
|
],
|
237
239
|
[
|
@@ -4,37 +4,18 @@ class SudachiSynonymDictionaryTest < Test::Unit::TestCase
|
|
4
4
|
end
|
5
5
|
|
6
6
|
test('#each') do
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
notation: "曖昧",
|
20
|
-
},
|
21
|
-
{
|
22
|
-
group_id: "024916",
|
23
|
-
is_noun: true,
|
24
|
-
expansion_type: :expanded,
|
25
|
-
lexeme_id: 1,
|
26
|
-
form_type: :typical,
|
27
|
-
acronym_type: :alphabet,
|
28
|
-
variant_type: :typical,
|
29
|
-
categories: ["ビジネス"],
|
30
|
-
notation: "SCM",
|
31
|
-
},
|
32
|
-
],
|
33
|
-
[
|
34
|
-
records.size,
|
35
|
-
records[0].to_h,
|
36
|
-
records[-1].to_h,
|
37
|
-
])
|
7
|
+
assert_equal({
|
8
|
+
group_id: "000001",
|
9
|
+
is_noun: true,
|
10
|
+
expansion_type: :always,
|
11
|
+
lexeme_id: 1,
|
12
|
+
form_type: :typical,
|
13
|
+
acronym_type: :typical,
|
14
|
+
variant_type: :typical,
|
15
|
+
categories: [],
|
16
|
+
notation: "曖昧",
|
17
|
+
},
|
18
|
+
@dataset.each.next.to_h)
|
38
19
|
end
|
39
20
|
|
40
21
|
sub_test_case('#metadata') do
|
data/test/test-wikipedia.rb
CHANGED
@@ -7,13 +7,13 @@ class WikipediaTest < Test::Unit::TestCase
|
|
7
7
|
end
|
8
8
|
|
9
9
|
test("#each") do
|
10
|
-
contributor = Datasets::Wikipedia::Contributor.new("
|
10
|
+
contributor = Datasets::Wikipedia::Contributor.new("Asparagusus", 43603280)
|
11
11
|
revision = Datasets::Wikipedia::Revision.new
|
12
|
-
revision.id =
|
13
|
-
revision.parent_id =
|
14
|
-
revision.timestamp = Time.iso8601("
|
12
|
+
revision.id = 1219062925
|
13
|
+
revision.parent_id = 1219062840
|
14
|
+
revision.timestamp = Time.iso8601("2024-04-15T14:38:04Z")
|
15
15
|
revision.contributor = contributor
|
16
|
-
revision.comment = "
|
16
|
+
revision.comment = "Restored revision 1002250816 by [[Special:Contributions/Elli|Elli]] ([[User talk:Elli|talk]]): Unexplained redirect breaking"
|
17
17
|
revision.model = "wikitext"
|
18
18
|
revision.format = "text/x-wiki"
|
19
19
|
revision.text = <<-TEXT.chomp
|