red-datasets 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -2
  3. data/doc/text/news.md +86 -0
  4. data/lib/datasets/adult.rb +6 -9
  5. data/lib/datasets/afinn.rb +48 -0
  6. data/lib/datasets/aozora-bunko.rb +196 -0
  7. data/lib/datasets/cache-path.rb +28 -0
  8. data/lib/datasets/california-housing.rb +60 -0
  9. data/lib/datasets/cifar.rb +2 -4
  10. data/lib/datasets/cldr-plurals.rb +2 -4
  11. data/lib/datasets/communities.rb +5 -8
  12. data/lib/datasets/dataset.rb +8 -12
  13. data/lib/datasets/diamonds.rb +26 -0
  14. data/lib/datasets/downloader.rb +6 -1
  15. data/lib/datasets/e-stat-japan.rb +2 -1
  16. data/lib/datasets/fashion-mnist.rb +4 -0
  17. data/lib/datasets/fuel-economy.rb +35 -0
  18. data/lib/datasets/geolonia.rb +67 -0
  19. data/lib/datasets/ggplot2-dataset.rb +79 -0
  20. data/lib/datasets/hepatitis.rb +5 -8
  21. data/lib/datasets/iris.rb +5 -8
  22. data/lib/datasets/ita-corpus.rb +57 -0
  23. data/lib/datasets/kuzushiji-mnist.rb +16 -0
  24. data/lib/datasets/libsvm-dataset-list.rb +5 -8
  25. data/lib/datasets/libsvm.rb +3 -4
  26. data/lib/datasets/license.rb +26 -0
  27. data/lib/datasets/livedoor-news.rb +80 -0
  28. data/lib/datasets/metadata.rb +14 -0
  29. data/lib/datasets/mnist.rb +7 -7
  30. data/lib/datasets/mushroom.rb +5 -8
  31. data/lib/datasets/penguins.rb +4 -8
  32. data/lib/datasets/penn-treebank.rb +2 -4
  33. data/lib/datasets/pmjt-dataset-list.rb +67 -0
  34. data/lib/datasets/postal-code-japan.rb +2 -6
  35. data/lib/datasets/quora-duplicate-question-pair.rb +51 -0
  36. data/lib/datasets/{rdatasets.rb → rdataset.rb} +66 -15
  37. data/lib/datasets/seaborn.rb +90 -0
  38. data/lib/datasets/sudachi-synonym-dictionary.rb +5 -11
  39. data/lib/datasets/version.rb +1 -1
  40. data/lib/datasets/wikipedia-kyoto-japanese-english.rb +219 -0
  41. data/lib/datasets/wikipedia.rb +4 -5
  42. data/lib/datasets/wine.rb +6 -9
  43. data/lib/datasets/zip-extractor.rb +36 -0
  44. data/lib/datasets.rb +14 -2
  45. data/red-datasets.gemspec +1 -1
  46. data/test/helper.rb +21 -0
  47. data/test/test-afinn.rb +60 -0
  48. data/test/test-aozora-bunko.rb +190 -0
  49. data/test/test-california-housing.rb +56 -0
  50. data/test/test-cldr-plurals.rb +1 -1
  51. data/test/test-dataset.rb +15 -7
  52. data/test/test-diamonds.rb +71 -0
  53. data/test/test-fuel-economy.rb +75 -0
  54. data/test/test-geolonia.rb +64 -0
  55. data/test/test-ita-corpus.rb +69 -0
  56. data/test/test-kuzushiji-mnist.rb +137 -0
  57. data/test/test-license.rb +24 -0
  58. data/test/test-livedoor-news.rb +351 -0
  59. data/test/test-metadata.rb +36 -0
  60. data/test/test-penguins.rb +1 -1
  61. data/test/test-pmjt-dataset-list.rb +50 -0
  62. data/test/test-quora-duplicate-question-pair.rb +33 -0
  63. data/test/test-rdataset.rb +246 -0
  64. data/test/{test-seaborn-data.rb → test-seaborn.rb} +70 -4
  65. data/test/test-sudachi-synonym-dictionary.rb +5 -5
  66. data/test/test-wikipedia-kyoto-japanese-english.rb +178 -0
  67. metadata +58 -14
  68. data/lib/datasets/seaborn-data.rb +0 -49
  69. data/test/test-rdatasets.rb +0 -136
@@ -0,0 +1,36 @@
1
+ require 'zip'
2
+
3
+ module Datasets
4
+ class ZipExtractor
5
+ def initialize(path)
6
+ @path = path
7
+ end
8
+
9
+ def extract_first_file
10
+ Zip::File.open(@path) do |zip_file|
11
+ zip_file.each do |entry|
12
+ next unless entry.file?
13
+
14
+ entry.get_input_stream do |input|
15
+ return yield(input)
16
+ end
17
+ end
18
+ end
19
+ nil
20
+ end
21
+
22
+ def extract_file(file_path)
23
+ Zip::File.open(@path) do |zip_file|
24
+ zip_file.each do |entry|
25
+ next unless entry.file?
26
+ next unless entry.name == file_path
27
+
28
+ entry.get_input_stream do |input|
29
+ return yield(input)
30
+ end
31
+ end
32
+ end
33
+ nil
34
+ end
35
+ end
36
+ end
data/lib/datasets.rb CHANGED
@@ -1,22 +1,34 @@
1
1
  require_relative "datasets/version"
2
2
 
3
3
  require_relative "datasets/adult"
4
+ require_relative "datasets/afinn"
5
+ require_relative "datasets/aozora-bunko"
6
+ require_relative "datasets/california-housing"
4
7
  require_relative "datasets/cifar"
5
8
  require_relative "datasets/cldr-plurals"
6
9
  require_relative "datasets/communities"
10
+ require_relative "datasets/diamonds"
7
11
  require_relative "datasets/e-stat-japan"
8
12
  require_relative "datasets/fashion-mnist"
13
+ require_relative "datasets/fuel-economy"
14
+ require_relative "datasets/geolonia"
9
15
  require_relative "datasets/hepatitis"
10
16
  require_relative "datasets/iris"
17
+ require_relative "datasets/ita-corpus"
18
+ require_relative "datasets/kuzushiji-mnist"
11
19
  require_relative "datasets/libsvm"
12
20
  require_relative "datasets/libsvm-dataset-list"
21
+ require_relative "datasets/livedoor-news"
13
22
  require_relative "datasets/mnist"
14
23
  require_relative "datasets/mushroom"
15
24
  require_relative "datasets/penguins"
16
25
  require_relative "datasets/penn-treebank"
26
+ require_relative "datasets/pmjt-dataset-list"
17
27
  require_relative "datasets/postal-code-japan"
18
- require_relative "datasets/rdatasets"
19
- require_relative "datasets/seaborn-data"
28
+ require_relative "datasets/quora-duplicate-question-pair"
29
+ require_relative "datasets/rdataset"
30
+ require_relative "datasets/seaborn"
20
31
  require_relative "datasets/sudachi-synonym-dictionary"
21
32
  require_relative "datasets/wikipedia"
33
+ require_relative "datasets/wikipedia-kyoto-japanese-english"
22
34
  require_relative "datasets/wine"
data/red-datasets.gemspec CHANGED
@@ -34,7 +34,7 @@ Gem::Specification.new do |spec|
34
34
  spec.files += Dir.glob("doc/text/*")
35
35
  spec.test_files += Dir.glob("test/**/*")
36
36
 
37
- spec.add_runtime_dependency("csv", ">= 3.0.5")
37
+ spec.add_runtime_dependency("csv", ">= 3.2.4")
38
38
  spec.add_runtime_dependency("rexml")
39
39
  spec.add_runtime_dependency("rubyzip")
40
40
 
data/test/helper.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "fileutils"
2
2
  require "pathname"
3
3
  require "time"
4
+ require "tmpdir"
4
5
 
5
6
  require "datasets"
6
7
 
@@ -18,4 +19,24 @@ module Helper
18
19
  FileUtils.rm_rf(@tmp_dir)
19
20
  end
20
21
  end
22
+
23
+ module PathRestorable
24
+ def restore_path(path)
25
+ unless path.exist?
26
+ return yield
27
+ end
28
+
29
+ Dir.mktmpdir do |dir|
30
+ FileUtils.cp_r(path, dir, preserve: true)
31
+ begin
32
+ yield
33
+ ensure
34
+ FileUtils.rmtree(path, secure: true) if path.exist?
35
+ FileUtils.cp_r(Pathname(dir) + path.basename,
36
+ path,
37
+ preserve: true)
38
+ end
39
+ end
40
+ end
41
+ end
21
42
  end
@@ -0,0 +1,60 @@
1
+ class AFINNTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::AFINN.new
4
+ end
5
+
6
+ test('#each') do
7
+ records = @dataset.each.to_a
8
+ assert_equal([
9
+ 2477,
10
+ {
11
+ :valence => -2,
12
+ :word => "abandon"
13
+ },
14
+ {
15
+ :valence => 2,
16
+ :word => "zealous"
17
+ },
18
+ ],
19
+ [
20
+ records.size,
21
+ records[0].to_h,
22
+ records[-1].to_h,
23
+ ])
24
+ end
25
+
26
+ sub_test_case('#metadata') do
27
+ test('#description') do
28
+ description = @dataset.metadata.description
29
+ assert_equal(<<-DESCRIPTION.chomp, description)
30
+ AFINN is a list of English words rated for valence with an integer
31
+ between minus five (negative) and plus five (positive). The words have
32
+ been manually labeled by Finn Årup Nielsen in 2009-2011. The file
33
+ is tab-separated. There are two versions:
34
+
35
+ AFINN-111: Newest version with 2477 words and phrases.
36
+
37
+ An evaluation of the word list is available in:
38
+
39
+ Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
40
+ sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903
41
+
42
+ The list was used in:
43
+
44
+ Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni,
45
+ Michael Etter, "Good Friends, Bad News - Affect and Virality in
46
+ Twitter", The 2011 International Workshop on Social Computing,
47
+ Network, and Services (SocialComNet 2011).
48
+
49
+
50
+ This database of words is copyright protected and distributed under
51
+ "Open Database License (ODbL) v1.0"
52
+ http://www.opendatacommons.org/licenses/odbl/1.0/ or a similar
53
+ copyleft license.
54
+
55
+ See comments on the word list here:
56
+ http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis
57
+ DESCRIPTION
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,190 @@
1
+ class AozoraBunkoTest < Test::Unit::TestCase
2
+ include Helper::PathRestorable
3
+
4
+ def setup
5
+ @dataset = Datasets::AozoraBunko.new
6
+ @cache_path = @dataset.send(:cache_path)
7
+ end
8
+
9
+ test('#new') do
10
+ assert_equal({
11
+ title_id: '059898',
12
+ title: 'ウェストミンスター寺院',
13
+ title_reading: 'ウェストミンスターじいん',
14
+ title_reading_collation: 'うえすとみんすたあしいん',
15
+ subtitle: '',
16
+ subtitle_reading: '',
17
+ original_title: '',
18
+ first_appearance: '',
19
+ ndc_code: 'NDC 933',
20
+ syllabary_spelling_type: '新字新仮名',
21
+ copyrighted: false,
22
+ published_date: '2020-04-03',
23
+ last_updated_date: '2020-03-28',
24
+ detail_url: 'https://www.aozora.gr.jp/cards/001257/card59898.html',
25
+ person_id: '001257',
26
+ person_family_name: 'アーヴィング',
27
+ person_first_name: 'ワシントン',
28
+ person_family_name_reading: 'アーヴィング',
29
+ person_first_name_reading: 'ワシントン',
30
+ person_family_name_reading_collation: 'ああういんく',
31
+ person_first_name_reading_collation: 'わしんとん',
32
+ person_family_name_romaji: 'Irving',
33
+ person_first_name_romaji: 'Washington',
34
+ person_type: '著者',
35
+ person_birthday: '1783-04-03',
36
+ person_date_of_death: '1859-11-28',
37
+ person_copyrighted: false,
38
+ original_book_name1: 'スケッチ・ブック',
39
+ original_book_publisher_name1: '新潮文庫、新潮社',
40
+ original_book_first_published_date1: '1957(昭和32)年5月20日',
41
+ used_version_for_registration1: '2000(平成12)年2月20日33刷改版',
42
+ used_version_for_proofreading1: '2000(平成12)年2月20日33刷改版',
43
+ base_of_original_book_name1: '',
44
+ base_of_original_book_publisher_name1: '',
45
+ base_of_original_book_first_published_date1: '',
46
+ original_book_name2: '',
47
+ original_book_publisher_name2: '',
48
+ original_book_first_published_date2: '',
49
+ used_version_for_registration2: '',
50
+ used_version_for_proofreading2: '',
51
+ base_of_original_book_name2: '',
52
+ base_of_original_book_publisher_name2: '',
53
+ base_of_original_book_first_published_date2: '',
54
+ registered_person_name: 'えにしだ',
55
+ proofreader_name: '砂場清隆',
56
+ text_file_url: 'https://www.aozora.gr.jp/cards/001257/files/59898_ruby_70679.zip',
57
+ last_text_file_updated_date: '2020-03-28',
58
+ text_file_character_encoding: 'ShiftJIS',
59
+ text_file_character_set: 'JIS X 0208',
60
+ text_file_updating_count: '0',
61
+ html_file_url: 'https://www.aozora.gr.jp/cards/001257/files/59898_70731.html',
62
+ last_html_file_updated_date: '2020-03-28',
63
+ html_file_character_encoding: 'ShiftJIS',
64
+ html_file_character_set: 'JIS X 0208',
65
+ html_file_updating_count: '0'
66
+
67
+ },
68
+ @dataset.first.to_h)
69
+ end
70
+
71
+ sub_test_case(:Book) do
72
+ sub_test_case('#text') do
73
+ test('readable') do
74
+ book = Datasets::AozoraBunko::Book.new
75
+ book.cache_path = @cache_path
76
+ book.title_id = '059898'
77
+ book.person_id = '001257'
78
+ book.text_file_url = 'https://www.aozora.gr.jp/cards/001257/files/59898_ruby_70679.zip'
79
+ book.text_file_character_encoding = 'ShiftJIS'
80
+
81
+ assert_equal([
82
+ 'ウェストミンスター寺',
83
+ "アの皆さんです。\r\n"
84
+ ],
85
+ [
86
+ book.text[0, 10],
87
+ book.text[-10, 10]
88
+ ])
89
+ end
90
+
91
+ test('not readable') do
92
+ book = Datasets::AozoraBunko::Book.new
93
+ book.text_file_url = 'https://mega.nz/file/6tMxgAjZ#PglDDyJL0syRhnULqK0qhTMC7cktsgqwObj5fY_knpE'
94
+
95
+ assert_equal(nil, book.text)
96
+ end
97
+ end
98
+
99
+ sub_test_case('#html') do
100
+ sub_test_case('readable') do
101
+ test('encoding is ShiftJIS') do
102
+ book = Datasets::AozoraBunko::Book.new
103
+ book.cache_path = @cache_path
104
+ book.title_id = '059898'
105
+ book.person_id = '001257'
106
+ book.html_file_url = 'https://www.aozora.gr.jp/cards/001257/files/59898_70731.html'
107
+ book.html_file_character_encoding = 'ShiftJIS'
108
+
109
+ assert_equal("<title>ワシントン・アーヴィング Washington Irving 吉田甲子太郎訳 ウェストミンスター寺院</title>",
110
+ book.html.split("\n")[8].strip)
111
+ end
112
+
113
+ test('encoding is UTF-8') do
114
+ book = Datasets::AozoraBunko::Book.new
115
+ book.cache_path = @cache_path
116
+
117
+ book.title_id = '000750'
118
+ book.person_id = '000146'
119
+ book.html_file_url = 'http://www.lcv.ne.jp/~ibs52086/fire/'
120
+ book.html_file_character_encoding = 'UTF-8'
121
+
122
+ assert_equal('<title>種田山頭火句集 | 『草木塔抄』他 FIRE ON THE MOUNTAIN</title>',
123
+ book.html.split("\n")[7])
124
+ end
125
+ end
126
+
127
+ test('not readable') do
128
+ book = Datasets::AozoraBunko::Book.new
129
+ book.html_file_url = ''
130
+
131
+ assert_equal(nil, book.html)
132
+ end
133
+ end
134
+
135
+ sub_test_case('converting boolean') do
136
+ test('#person_copyrighted?') do
137
+ book = @dataset.first
138
+ assert_equal([
139
+ false,
140
+ false,
141
+ false,
142
+ ],
143
+ [
144
+ book.person_copyrighted?,
145
+ book.person_copyrighted,
146
+ book.to_h[:person_copyrighted],
147
+ ])
148
+ end
149
+
150
+ test('#copyrighted?') do
151
+ book = @dataset.first
152
+ assert_equal([
153
+ false,
154
+ false,
155
+ false,
156
+ ],
157
+ [
158
+ book.copyrighted?,
159
+ book.copyrighted,
160
+ book.to_h[:copyrighted],
161
+ ])
162
+ end
163
+ end
164
+
165
+ test('#clear_cache! removes all cache files') do
166
+ book = Datasets::AozoraBunko::Book.new
167
+ book.cache_path = @cache_path
168
+
169
+ book.title_id = '059898'
170
+ book.person_id = '001257'
171
+ book.text_file_url = 'https://www.aozora.gr.jp/cards/001257/files/59898_ruby_70679.zip'
172
+ book.text_file_character_encoding = 'ShiftJIS'
173
+ book.html_file_url = 'https://www.aozora.gr.jp/cards/001257/files/59898_70731.html'
174
+ book.html_file_character_encoding = 'ShiftJIS'
175
+
176
+ book.text
177
+ book.html
178
+
179
+ restore_path(@cache_path.base_dir) do
180
+ assert_equal(true, @cache_path.base_dir.exist?)
181
+ assert_equal(true, book.send(:text_file_output_path).exist?)
182
+ assert_equal(true, book.send(:html_file_output_path).exist?)
183
+ @dataset.clear_cache!
184
+ assert_equal(false, book.send(:html_file_output_path).exist?)
185
+ assert_equal(false, book.send(:text_file_output_path).exist?)
186
+ assert_equal(false, @cache_path.base_dir.exist?)
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,56 @@
1
+ class CaliforniaHousingTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::CaliforniaHousing.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::CaliforniaHousing::Record.new(*args)
8
+ end
9
+
10
+ test("#each") do
11
+ records = @dataset.each.to_a
12
+ assert_equal([
13
+ 20640,
14
+ {
15
+ median_house_value: 452600.000000,
16
+ median_income: 8.325200,
17
+ housing_median_age: 41.000000,
18
+ total_rooms: 880.000000,
19
+ total_bedrooms: 129.000000,
20
+ population: 322.000000,
21
+ households: 126.000000,
22
+ latitude: 37.880000,
23
+ longitude: -122.230000
24
+ },
25
+ {
26
+ median_house_value: 89400.000000,
27
+ median_income: 2.388600,
28
+ housing_median_age: 16.000000,
29
+ total_rooms: 2785.000000,
30
+ total_bedrooms: 616.000000,
31
+ population: 1387.000000,
32
+ households: 530.000000,
33
+ latitude: 39.370000,
34
+ longitude: -121.240000
35
+ },
36
+ ],
37
+ [
38
+ records.size,
39
+ records[0].to_h,
40
+ records[-1].to_h
41
+ ])
42
+ end
43
+
44
+ sub_test_case("#metadata") do
45
+ test("#description") do
46
+ description = @dataset.metadata.description
47
+ assert_equal(<<-DESCRIPTION, description)
48
+ Housing information from the 1990 census used in
49
+ Pace, R. Kelley and Ronald Barry,
50
+ "Sparse Spatial Autoregressions",
51
+ Statistics and Probability Letters, 33 (1997) 291-297.
52
+ Available from http://lib.stat.cmu.edu/datasets/.
53
+ DESCRIPTION
54
+ end
55
+ end
56
+ end
@@ -14,7 +14,7 @@ class CLDRPluralsTest < Test::Unit::TestCase
14
14
  test("#each") do
15
15
  locales = @dataset.each.to_a
16
16
  assert_equal([
17
- 218,
17
+ 219,
18
18
  locale("bm",
19
19
  [
20
20
  rule("other",
data/test/test-dataset.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  class TestDataset < Test::Unit::TestCase
2
2
  sub_test_case("#clear_cache!") do
3
+ include Helper::PathRestorable
4
+
3
5
  def setup
4
6
  @dataset = Datasets::Iris.new
5
7
  @cache_dir_path = @dataset.send(:cache_dir_path)
@@ -9,18 +11,24 @@ class TestDataset < Test::Unit::TestCase
9
11
  @dataset.first # This ensures the dataset downloaded
10
12
  existence = {before: @cache_dir_path.join("iris.csv").exist?}
11
13
 
12
- @dataset.clear_cache!
13
- existence[:after] = @cache_dir_path.join("iris.csv").exist?
14
+ restore_path(@cache_dir_path) do
15
+ @dataset.clear_cache!
16
+ existence[:after] = @cache_dir_path.join("iris.csv").exist?
14
17
 
15
- assert_equal({before: true, after: false},
16
- existence)
18
+ assert_equal({before: true, after: false},
19
+ existence)
20
+ end
17
21
  end
18
22
 
19
23
  test("when the dataset is not downloaded") do
20
- FileUtils.rmtree(@cache_dir_path.to_s, secure: true) if @cache_dir_path.exist?
24
+ restore_path(@cache_dir_path) do
25
+ if @cache_dir_path.exist?
26
+ FileUtils.rmtree(@cache_dir_path.to_s, secure: true)
27
+ end
21
28
 
22
- assert_nothing_raised do
23
- @dataset.clear_cache!
29
+ assert_nothing_raised do
30
+ @dataset.clear_cache!
31
+ end
24
32
  end
25
33
  end
26
34
  end
@@ -0,0 +1,71 @@
1
+ class DiamondsTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::Diamonds.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::Diamonds::Record.new(*args)
8
+ end
9
+
10
+ test("#each") do
11
+ records = @dataset.each.to_a
12
+ assert_equal([
13
+ 53940,
14
+ {
15
+ carat: 0.23,
16
+ clarity: "SI2",
17
+ color: "E",
18
+ cut: "Ideal",
19
+ depth: 61.5,
20
+ price: 326,
21
+ table: 55.0,
22
+ x: 3.95,
23
+ y: 3.98,
24
+ z: 2.43,
25
+ },
26
+ {
27
+ carat: 0.75,
28
+ clarity: "SI2",
29
+ color: "D",
30
+ cut: "Ideal",
31
+ depth: 62.2,
32
+ price: 2757,
33
+ table: 55.0,
34
+ x: 5.83,
35
+ y: 5.87,
36
+ z: 3.64,
37
+ },
38
+ ],
39
+ [
40
+ records.size,
41
+ records[0].to_h,
42
+ records[-1].to_h
43
+ ])
44
+ end
45
+
46
+ sub_test_case("#metadata") do
47
+ test("#description") do
48
+ description = @dataset.metadata.description
49
+ assert_equal(<<-DESCRIPTION, description)
50
+ Prices of over 50,000 round cut diamonds
51
+
52
+ A dataset containing the prices and other attributes of almost 54,000
53
+ diamonds. The variables are as follows:
54
+
55
+ A data frame with 53940 rows and 10 variables:
56
+
57
+ * price: price in US dollars ($326--$18,823)
58
+ * carat: weight of the diamond (0.2--5.01)
59
+ * cut: quality of the cut (Fair, Good, Very Good, Premium, Ideal)
60
+ * color: diamond colour, from D (best) to J (worst)
61
+ * clarity: a measurement of how clear the diamond is (I1 (worst), SI2,
62
+ SI1, VS2, VS1, VVS2, VVS1, IF (best))
63
+ * x: length in mm (0--10.74)
64
+ * y: width in mm (0--58.9)
65
+ * z: depth in mm (0--31.8)
66
+ * depth: total depth percentage = z / mean(x, y) = 2 * z / (x + y) (43--79)
67
+ * table: width of top of diamond relative to widest point (43--95)
68
+ DESCRIPTION
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,75 @@
1
+ class FuelEconomyTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::FuelEconomy.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::FuelEconomy::Record.new(*args)
8
+ end
9
+
10
+ test("#each") do
11
+ records = @dataset.each.to_a
12
+ assert_equal([
13
+ 234,
14
+ {
15
+ city_mpg: 18,
16
+ displacement: 1.8,
17
+ drive_train: "f",
18
+ fuel: "p",
19
+ highway_mpg: 29,
20
+ manufacturer: "audi",
21
+ model: "a4",
22
+ n_cylinders: 4,
23
+ transmission: "auto(l5)",
24
+ type: "compact",
25
+ year: 1999
26
+ },
27
+ {
28
+ city_mpg: 17,
29
+ displacement: 3.6,
30
+ drive_train: "f",
31
+ fuel: "p",
32
+ highway_mpg: 26,
33
+ manufacturer: "volkswagen",
34
+ model: "passat",
35
+ n_cylinders: 6,
36
+ transmission: "auto(s6)",
37
+ type: "midsize",
38
+ year: 2008
39
+ },
40
+ ],
41
+ [
42
+ records.size,
43
+ records[0].to_h,
44
+ records[-1].to_h
45
+ ])
46
+ end
47
+
48
+ sub_test_case("#metadata") do
49
+ test("#description") do
50
+ description = @dataset.metadata.description
51
+ assert_equal(<<-DESCRIPTION, description)
52
+ Fuel economy data from 1999 to 2008 for 38 popular models of cars
53
+
54
+ This dataset contains a subset of the fuel economy data that the EPA makes
55
+ available on https://fueleconomy.gov/. It contains only models which
56
+ had a new release every year between 1999 and 2008 - this was used as a
57
+ proxy for the popularity of the car.
58
+
59
+ A data frame with 234 rows and 11 variables:
60
+
61
+ * manufacturer: manufacturer name
62
+ * model: model name
63
+ * displacement: engine displacement, in litres
64
+ * year: year of manufacture
65
+ * n_cylinders: number of cylinders
66
+ * transmissions: type of transmission
67
+ * drive_train: the type of drive train, where f = front-wheel drive, r = rear wheel drive, 4 = 4wd
68
+ * city_mpg: city miles per gallon
69
+ * highway_mpg: highway miles per gallon
70
+ * fuel: fuel type
71
+ * type: "type" of car
72
+ DESCRIPTION
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,64 @@
1
+ class GeoloniaTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::Geolonia.new
4
+ end
5
+
6
+ test('#each') do
7
+ records = @dataset.each.to_a
8
+ assert_equal([
9
+ 277191,
10
+ {
11
+ :prefecture_code => "01",
12
+ :prefecture_name => "北海道",
13
+ :prefecture_kana => "ホッカイドウ",
14
+ :prefecture_romaji => "HOKKAIDO",
15
+ :municipality_code => "01101",
16
+ :municipality_name => "札幌市中央区",
17
+ :municipality_kana => "サッポロシチュウオウク",
18
+ :municipality_romaji => "SAPPORO SHI CHUO KU",
19
+ :street_name => "旭ケ丘一丁目",
20
+ :street_kana => "アサヒガオカ 1",
21
+ :street_romaji => "ASAHIGAOKA 1",
22
+ :alias => nil,
23
+ :latitude => "43.04223",
24
+ :longitude => "141.319722"
25
+ },
26
+ {
27
+ :prefecture_code => "47",
28
+ :prefecture_name => "沖縄県",
29
+ :prefecture_kana => "オキナワケン",
30
+ :prefecture_romaji => "OKINAWA KEN",
31
+ :municipality_code => "47325",
32
+ :municipality_name => "中頭郡嘉手納町",
33
+ :municipality_kana => "ナカガミグンカデナチョウ",
34
+ :municipality_romaji => "NAKAGAMI GUN KADENA CHO",
35
+ :street_name => "字兼久",
36
+ :street_kana => nil,
37
+ :street_romaji => nil,
38
+ :alias => "下原",
39
+ :latitude => "26.351841",
40
+ :longitude => "127.744975",
41
+ },
42
+ ],
43
+ [
44
+ records.size,
45
+ records[0].to_h,
46
+ records[-1].to_h,
47
+ ])
48
+ end
49
+
50
+ sub_test_case("#metadata") do
51
+ test("#description") do
52
+ description = @dataset.metadata.description
53
+ assert_equal([
54
+ "# Geolonia 住所データ",
55
+ "## 住所データ仕様",
56
+ "### ファイルフォーマット",
57
+ "### 列",
58
+ ],
59
+ description.scan(/^#.*$/),
60
+ description)
61
+ end
62
+ end
63
+
64
+ end