red-datasets 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/Rakefile +10 -0
  4. data/doc/text/news.md +29 -0
  5. data/lib/datasets/california-housing.rb +1 -1
  6. data/lib/datasets/dataset.rb +2 -2
  7. data/lib/datasets/downloader.rb +34 -16
  8. data/lib/datasets/fashion-mnist.rb +6 -2
  9. data/lib/datasets/ggplot2-dataset.rb +3 -3
  10. data/lib/datasets/house-of-councillor.rb +169 -0
  11. data/lib/datasets/house-of-representative.rb +107 -0
  12. data/lib/datasets/japanese-date-parser.rb +38 -0
  13. data/lib/datasets/kuzushiji-mnist.rb +6 -2
  14. data/lib/datasets/lazy.rb +2 -0
  15. data/lib/datasets/libsvm-dataset-list.rb +1 -1
  16. data/lib/datasets/mnist.rb +12 -6
  17. data/lib/datasets/nagoya-university-conversation-corpus.rb +2 -2
  18. data/lib/datasets/postal-code-japan.rb +3 -3
  19. data/lib/datasets/quora-duplicate-question-pair.rb +1 -1
  20. data/lib/datasets/version.rb +1 -1
  21. data/lib/datasets/wikipedia-kyoto-japanese-english.rb +2 -2
  22. data/lib/datasets/wikipedia.rb +2 -2
  23. data/test/japanese-date-parser-test.rb +27 -0
  24. data/test/test-adult.rb +36 -86
  25. data/test/test-aozora-bunko.rb +5 -5
  26. data/test/test-california-housing.rb +12 -31
  27. data/test/test-cldr-plurals.rb +1 -1
  28. data/test/test-diamonds.rb +13 -33
  29. data/test/test-downloader.rb +1 -1
  30. data/test/test-geolonia.rb +17 -41
  31. data/test/test-house-of-councillor.rb +223 -0
  32. data/test/test-house-of-representative.rb +54 -0
  33. data/test/test-nagoya-university-conversation-corpus.rb +17 -69
  34. data/test/test-postal-code-japan.rb +7 -0
  35. data/test/test-quora-duplicate-question-pair.rb +7 -21
  36. data/test/test-rdataset.rb +24 -22
  37. data/test/test-sudachi-synonym-dictionary.rb +12 -31
  38. data/test/test-wikipedia.rb +5 -5
  39. metadata +12 -6
@@ -0,0 +1,54 @@
1
+ class HouseOfRepresentativeTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::HouseOfRepresentative.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::HouseOfRepresentative::Record.new(*args)
8
+ end
9
+
10
+ test("#each") do
11
+ assert_equal(record(142,
12
+ "衆法の一覧",
13
+ nil,
14
+ 139,
15
+ 18,
16
+ "市民活動促進法案",
17
+ "成立",
18
+ "経過",
19
+ "https://www.shugiin.go.jp/internet/itdb_gian.nsf/html/gian/keika/5516.htm",
20
+ nil,
21
+ nil,
22
+ "衆法",
23
+ "熊代 昭彦君外四名",
24
+ %w(自由民主党 社会民主党・市民連合 新党さきがけ),
25
+ nil,
26
+ nil,
27
+ nil,
28
+ Date.jisx0301("H10.03.04"),
29
+ Date.jisx0301("H10.03.11"),
30
+ "内閣",
31
+ Date.jisx0301("H10.03.17"),
32
+ "可決",
33
+ Date.jisx0301("H10.03.19"),
34
+ "可決",
35
+ nil,
36
+ nil,
37
+ nil,
38
+ nil,
39
+ nil,
40
+ nil,
41
+ nil,
42
+ Date.jisx0301("H10.01.12"),
43
+ "労働・社会政策",
44
+ Date.jisx0301("H10.03.03"),
45
+ "修正",
46
+ Date.jisx0301("H10.03.04"),
47
+ "修正",
48
+ Date.jisx0301("H10.03.25"),
49
+ 7,
50
+ nil,
51
+ nil),
52
+ @dataset.each.next)
53
+ end
54
+ end
@@ -5,116 +5,64 @@ class NagoyaUniversityConversationCorpusTest < Test::Unit::TestCase
5
5
 
6
6
  sub_test_case("each") do
7
7
  test("#sentences") do
8
- records = @dataset.each.to_a
9
- first_sentences = records[0].sentences
10
- last_sentences = records[-1].sentences
8
+ first_sentences = @dataset.each.next.sentences
11
9
  assert_equal([
12
10
  856,
13
11
  {
14
12
  participant_id: 'F107',
15
- content: '***の町というのはちいちゃくって、城壁がこう町全体をぐるっと回ってて、それが城壁の上を歩いても1時間ぐらいですよね。'
13
+ content: '***の町というのはちいちゃくって、城壁がこう町全体をぐるっと回ってて、それが城壁の上を歩いても1時間ぐらいですよね。',
16
14
  },
17
15
  {
18
16
  participant_id: nil,
19
- content: nil
17
+ content: nil,
20
18
  },
21
- 603,
22
- {
23
- participant_id: 'F007',
24
- content: 'それでは話を始めまーす。'
25
- },
26
- {
27
- participant_id: nil,
28
- content: nil
29
- }
30
19
  ],
31
20
  [
32
21
  first_sentences.size,
33
22
  first_sentences[0].to_h,
34
23
  first_sentences[-1].to_h,
35
- last_sentences.size,
36
- last_sentences[0].to_h,
37
- last_sentences[-1].to_h,
38
24
  ])
39
25
  end
40
26
 
41
27
  test("#participants") do
42
- records = @dataset.each.to_a
43
- first_participants = records[0].participants
44
- last_participants = records[-1].participants
28
+ first_participants = @dataset.each.next.participants
45
29
  assert_equal([
46
30
  4,
47
31
  {
48
32
  id: 'F107',
49
33
  attribute: '女性30代後半',
50
34
  birthplace: '愛知県幡豆郡出身',
51
- residence: '愛知県幡豆郡在住'
35
+ residence: '愛知県幡豆郡在住',
52
36
  },
53
37
  {
54
38
  id: 'F128',
55
39
  attribute: '女性20代前半',
56
40
  birthplace: '愛知県西尾市出身',
57
- residence: '西尾市在住'
41
+ residence: '西尾市在住',
58
42
  },
59
- 2,
60
- {
61
- id: 'F007',
62
- attribute: '女性50代後半',
63
- birthplace: '東京都出身',
64
- residence: '東京都国分寺市在住'
65
- },
66
- {
67
- id: 'F003',
68
- attribute: '女性80代後半',
69
- birthplace: '栃木県宇都宮市出身',
70
- residence: '国分寺市在住'
71
- }
72
43
  ],
73
44
  [
74
45
  first_participants.size,
75
46
  first_participants[0].to_h,
76
47
  first_participants[-1].to_h,
77
- last_participants.size,
78
- last_participants[0].to_h,
79
- last_participants[-1].to_h
80
48
  ])
81
49
  end
82
50
 
83
51
  test("others") do
84
- records = @dataset.each.to_a
52
+ first_record = @dataset.each.next
85
53
  assert_equal([
86
- 129,
87
- [
88
- '1(約35分)',
89
- '2001年10月16日',
90
- 'ファミリーレストラン',
91
- '英会話教室の友人',
92
- nil
93
- ],
94
- [
95
- '129(36分)',
96
- '2003年2月16日',
97
- '二人の自宅',
98
- '母と娘',
99
- 'F007は東京に38年、F003は東京に60年居住。'
100
- ]
54
+ '1(約35分)',
55
+ '2001年10月16日',
56
+ 'ファミリーレストラン',
57
+ '英会話教室の友人',
58
+ nil,
101
59
  ],
102
60
  [
103
- records.size,
104
- [
105
- records[0].name,
106
- records[0].date,
107
- records[0].place,
108
- records[0].relationships,
109
- records[0].note
110
- ],
111
- [
112
- records[-1].name,
113
- records[-1].date,
114
- records[-1].place,
115
- records[-1].relationships,
116
- records[-1].note
117
- ]
61
+ first_record.name,
62
+ first_record.date,
63
+ first_record.place,
64
+ first_record.relationships,
65
+ first_record.note,
118
66
  ])
119
67
  end
120
68
  end
@@ -1,4 +1,11 @@
1
1
  class PostalCodeJapanTest < Test::Unit::TestCase
2
+ test("invalid") do
3
+ message = ":reading must be one of [:lowercase, :uppercase, :romaji]: :invalid"
4
+ assert_raise(ArgumentError.new(message)) do
5
+ Datasets::PostalCodeJapan.new(reading: :invalid)
6
+ end
7
+ end
8
+
2
9
  sub_test_case(":reading") do
3
10
  test(":lowercase") do
4
11
  dataset = Datasets::PostalCodeJapan.new(reading: :lowercase)
@@ -8,26 +8,12 @@ class QuoraDuplicateQuestionPairTest < Test::Unit::TestCase
8
8
  end
9
9
 
10
10
  test("#each") do
11
- records = @dataset.each.to_a
12
- assert_equal([
13
- 404290,
14
- record(0,
15
- 1,
16
- 2,
17
- "What is the step by step guide to invest in share market in india?",
18
- "What is the step by step guide to invest in share market?",
19
- false),
20
- record(404289,
21
- 537932,
22
- 537933,
23
- "What is like to have sex with cousin?",
24
- "What is it like to have sex with your cousin?",
25
- false),
26
- ],
27
- [
28
- records.size,
29
- records.first,
30
- records.last,
31
- ])
11
+ assert_equal(record(0,
12
+ 1,
13
+ 2,
14
+ "What is the step by step guide to invest in share market in india?",
15
+ "What is the step by step guide to invest in share market?",
16
+ false),
17
+ @dataset.each.next)
32
18
  end
33
19
  end
@@ -8,7 +8,7 @@ class RdatasetTest < Test::Unit::TestCase
8
8
  test("with package_name") do
9
9
  records = @dataset.filter(package: "datasets").to_a
10
10
  assert_equal([
11
- 102,
11
+ 104,
12
12
  {
13
13
  package: "datasets",
14
14
  dataset: "ability.cov",
@@ -48,7 +48,7 @@ class RdatasetTest < Test::Unit::TestCase
48
48
  test("without package_name") do
49
49
  records = @dataset.each.to_a
50
50
  assert_equal([
51
- 2142,
51
+ 2337,
52
52
  {
53
53
  package: "AER",
54
54
  dataset: "Affairs",
@@ -110,8 +110,8 @@ class RdatasetTest < Test::Unit::TestCase
110
110
  records = @dataset.each.to_a
111
111
  assert_equal([
112
112
  144,
113
- { time: 1949, value: 112 },
114
- { time: 1960.91666666667, value: 432 },
113
+ { time: 1949, value: 112, rownames: 1 },
114
+ { time: 1960.91666666667, value: 432, rownames: 144 },
115
115
  ],
116
116
  [
117
117
  records.size,
@@ -141,8 +141,8 @@ class RdatasetTest < Test::Unit::TestCase
141
141
  records = @dataset.each.to_a
142
142
  assert_equal([
143
143
  153,
144
- { Ozone: nil, "Solar.R": nil, Wind: 14.3, Temp: 56, Month: 5, Day: 5 },
145
- { Ozone: 20, "Solar.R": 223, Wind: 11.5, Temp: 68, Month: 9, Day: 30 },
144
+ { Ozone: nil, "Solar.R": nil, Wind: 14.3, Temp: 56, Month: 5, Day: 5, rownames: 5 },
145
+ { Ozone: 20, "Solar.R": 223, Wind: 11.5, Temp: 68, Month: 9, Day: 30, rownames: 153 },
146
146
  ],
147
147
  [
148
148
  records.size,
@@ -161,10 +161,10 @@ class RdatasetTest < Test::Unit::TestCase
161
161
  records = @dataset.each.to_a
162
162
  assert_equal([
163
163
  182,
164
- { event: 1, mag: 7, station: "117", dist: 12, accel: 0.359 },
165
- { event: 16, mag: 5.1, station: nil, dist: 7.6, accel: 0.28 },
166
- { event: 23, mag: 5.3, station: "c168", dist: 25.3, accel: 0.23 },
167
- { event: 23, mag: 5.3, station: "5072", dist: 53.1, accel: 0.022 }
164
+ { event: 1, mag: 7, station: 117, dist: 12, accel: 0.359, rownames: 1 },
165
+ { event: 16, mag: 5.1, station: nil, dist: 7.6, accel: 0.28, rownames: 79 },
166
+ { event: 23, mag: 5.3, station: "c168", dist: 25.3, accel: 0.23, rownames: 170 },
167
+ { event: 23, mag: 5.3, station: 5072, dist: 53.1, accel: 0.022, rownames: 182 }
168
168
  ],
169
169
  [
170
170
  records.size,
@@ -187,8 +187,8 @@ class RdatasetTest < Test::Unit::TestCase
187
187
  records = @dataset.each.to_a
188
188
  assert_equal([
189
189
  192,
190
- { temp: 10, species: 'wheat', start: 0, end: 1.0, germinated: 0 },
191
- { temp: 40, species: 'rice', start: 18, end: Float::INFINITY, germinated: 12 }
190
+ { temp: 10, species: 'wheat', start: 0, end: 1, germinated: 0, rownames: 1 },
191
+ { temp: 40, species: 'rice', start: 18, end: Float::INFINITY, germinated: 12, rownames: 192 }
192
192
  ],
193
193
  [
194
194
  records.size,
@@ -213,25 +213,27 @@ class RdatasetTest < Test::Unit::TestCase
213
213
  Order: 398_481,
214
214
  Level: 1,
215
215
  Code: 'A',
216
- Parent: '',
216
+ Parent: nil,
217
217
  Description: 'AGRICULTURE, FORESTRY AND FISHING',
218
218
  This_item_includes: 'This section includes the exploitation of vegetal and animal natural resources, comprising the activities of growing of crops, raising and breeding of animals, harvesting of timber and other plants, animals or animal products from a farm or their natural habitats.',
219
- This_item_also_includes: '',
220
- Rulings: '',
221
- This_item_excludes: '',
222
- "Reference_to_ISIC_Rev._4": 'A'
219
+ This_item_also_includes: nil,
220
+ Rulings: nil,
221
+ This_item_excludes: nil,
222
+ "Reference_to_ISIC_Rev._4": 'A',
223
+ rownames: 1
223
224
  },
224
225
  {
225
226
  Order: 399_476,
226
227
  Level: 4,
227
- Code: '99.00',
228
- Parent: '99.0',
228
+ Code: 99.0,
229
+ Parent: 99.0,
229
230
  Description: 'Activities of extraterritorial organisations and bodies',
230
231
  This_item_includes: "This class includes:\n- activities of international organisations such as the United Nations and the specialised agencies of the United Nations system, regional bodies etc., the International Monetary Fund, the World Bank, the World Customs Organisation, the Organisation for Economic Co-operation and Development, the organisation of Petroleum Exporting Countries, the European Communities, the European Free Trade Association etc.",
231
232
  This_item_also_includes: "This class also includes:\n- activities of diplomatic and consular missions when being determined by the country of their location rather than by the country they represent",
232
- Rulings: '',
233
- This_item_excludes: '',
234
- "Reference_to_ISIC_Rev._4": '9900'
233
+ Rulings: nil,
234
+ This_item_excludes: nil,
235
+ "Reference_to_ISIC_Rev._4": 9900,
236
+ rownames: 996
235
237
  }
236
238
  ],
237
239
  [
@@ -4,37 +4,18 @@ class SudachiSynonymDictionaryTest < Test::Unit::TestCase
4
4
  end
5
5
 
6
6
  test('#each') do
7
- records = @dataset.each.to_a
8
- assert_equal([
9
- 65206,
10
- {
11
- group_id: "000001",
12
- is_noun: true,
13
- expansion_type: :always,
14
- lexeme_id: 1,
15
- form_type: :typical,
16
- acronym_type: :typical,
17
- variant_type: :typical,
18
- categories: [],
19
- notation: "曖昧",
20
- },
21
- {
22
- group_id: "024916",
23
- is_noun: true,
24
- expansion_type: :expanded,
25
- lexeme_id: 1,
26
- form_type: :typical,
27
- acronym_type: :alphabet,
28
- variant_type: :typical,
29
- categories: ["ビジネス"],
30
- notation: "SCM",
31
- },
32
- ],
33
- [
34
- records.size,
35
- records[0].to_h,
36
- records[-1].to_h,
37
- ])
7
+ assert_equal({
8
+ group_id: "000001",
9
+ is_noun: true,
10
+ expansion_type: :always,
11
+ lexeme_id: 1,
12
+ form_type: :typical,
13
+ acronym_type: :typical,
14
+ variant_type: :typical,
15
+ categories: [],
16
+ notation: "曖昧",
17
+ },
18
+ @dataset.each.next.to_h)
38
19
  end
39
20
 
40
21
  sub_test_case('#metadata') do
@@ -7,13 +7,13 @@ class WikipediaTest < Test::Unit::TestCase
7
7
  end
8
8
 
9
9
  test("#each") do
10
- contributor = Datasets::Wikipedia::Contributor.new("Elli", 20842734)
10
+ contributor = Datasets::Wikipedia::Contributor.new("Asparagusus", 43603280)
11
11
  revision = Datasets::Wikipedia::Revision.new
12
- revision.id = 1002250816
13
- revision.parent_id = 854851586
14
- revision.timestamp = Time.iso8601("2021-01-23T15:15:01Z")
12
+ revision.id = 1219062925
13
+ revision.parent_id = 1219062840
14
+ revision.timestamp = Time.iso8601("2024-04-15T14:38:04Z")
15
15
  revision.contributor = contributor
16
- revision.comment = "shel"
16
+ revision.comment = "Restored revision 1002250816 by [[Special:Contributions/Elli|Elli]] ([[User talk:Elli|talk]]): Unexplained redirect breaking"
17
17
  revision.model = "wikitext"
18
18
  revision.format = "text/x-wiki"
19
19
  revision.text = <<-TEXT.chomp
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
8
8
  - Kouhei Sutou
9
- autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2023-05-29 00:00:00.000000000 Z
11
+ date: 2025-02-07 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: csv
@@ -160,8 +159,11 @@ files:
160
159
  - lib/datasets/geolonia.rb
161
160
  - lib/datasets/ggplot2-dataset.rb
162
161
  - lib/datasets/hepatitis.rb
162
+ - lib/datasets/house-of-councillor.rb
163
+ - lib/datasets/house-of-representative.rb
163
164
  - lib/datasets/iris.rb
164
165
  - lib/datasets/ita-corpus.rb
166
+ - lib/datasets/japanese-date-parser.rb
165
167
  - lib/datasets/kuzushiji-mnist.rb
166
168
  - lib/datasets/lazy.rb
167
169
  - lib/datasets/libsvm-dataset-list.rb
@@ -189,6 +191,7 @@ files:
189
191
  - lib/datasets/zip-extractor.rb
190
192
  - red-datasets.gemspec
191
193
  - test/helper.rb
194
+ - test/japanese-date-parser-test.rb
192
195
  - test/run-test.rb
193
196
  - test/test-adult.rb
194
197
  - test/test-afinn.rb
@@ -206,6 +209,8 @@ files:
206
209
  - test/test-fuel-economy.rb
207
210
  - test/test-geolonia.rb
208
211
  - test/test-hepatitis.rb
212
+ - test/test-house-of-councillor.rb
213
+ - test/test-house-of-representative.rb
209
214
  - test/test-iris.rb
210
215
  - test/test-ita-corpus.rb
211
216
  - test/test-kuzushiji-mnist.rb
@@ -233,7 +238,6 @@ homepage: https://github.com/red-data-tools/red-datasets
233
238
  licenses:
234
239
  - MIT
235
240
  metadata: {}
236
- post_install_message:
237
241
  rdoc_options: []
238
242
  require_paths:
239
243
  - lib
@@ -248,12 +252,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
248
252
  - !ruby/object:Gem::Version
249
253
  version: '0'
250
254
  requirements: []
251
- rubygems_version: 3.5.0.dev
252
- signing_key:
255
+ rubygems_version: 3.6.2
253
256
  specification_version: 4
254
257
  summary: Red Datasets provides classes that provide common datasets such as iris dataset.
255
258
  test_files:
256
259
  - test/helper.rb
260
+ - test/japanese-date-parser-test.rb
257
261
  - test/run-test.rb
258
262
  - test/test-adult.rb
259
263
  - test/test-afinn.rb
@@ -271,6 +275,8 @@ test_files:
271
275
  - test/test-fuel-economy.rb
272
276
  - test/test-geolonia.rb
273
277
  - test/test-hepatitis.rb
278
+ - test/test-house-of-councillor.rb
279
+ - test/test-house-of-representative.rb
274
280
  - test/test-iris.rb
275
281
  - test/test-ita-corpus.rb
276
282
  - test/test-kuzushiji-mnist.rb