red-datasets 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/news.md +18 -0
- data/lib/datasets.rb +1 -0
- data/lib/datasets/adult.rb +4 -3
- data/lib/datasets/downloader.rb +3 -1
- data/lib/datasets/hepatitis.rb +207 -0
- data/lib/datasets/version.rb +1 -1
- data/test/test-hepatitis.rb +74 -0
- metadata +17 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 120492172aae9cec1c4fc4f3b73575cb5349caf2f0b67d70676c8896324e1491
|
4
|
+
data.tar.gz: e46eb3f2875cb407e86cc0976eff7d612beb62ca6b421a51435b5d5e1bfa6e03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 360bbf78c131f20a67359ddc2055cd58502da1f4e95adf30475cd405d5eb50be6ba4fd9aa0a0857226dc803e14282cc4231de113843e96657a65e287c7500137
|
7
|
+
data.tar.gz: f88ed1ae8c8f0dad9f4d8904a265c833ceee723ba92860c0e3bed4c193d56a901c31184abd4290058de47fbc089b12b4d3b1da064f138214e2954d45eee928da
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.1.0 - 2020-02-04
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for Ruby 2.7.
|
8
|
+
[GitHub#82][GitHub#83][Patch by Yasuo Honda]
|
9
|
+
|
10
|
+
* `Datasets::Hepatitis`: Added.
|
11
|
+
[GitHub#70][Patch by KazuhiroYoshimoto]
|
12
|
+
|
13
|
+
* `Datasets::Downloader`: Added support for query.
|
14
|
+
|
15
|
+
### Thanks
|
16
|
+
|
17
|
+
* Yasuo Honda
|
18
|
+
|
19
|
+
* KazuhiroYoshimoto
|
20
|
+
|
3
21
|
## 0.0.9 - 2019-09-09
|
4
22
|
|
5
23
|
### Improvements
|
data/lib/datasets.rb
CHANGED
@@ -3,6 +3,7 @@ require_relative "datasets/version"
|
|
3
3
|
require_relative "datasets/adult"
|
4
4
|
require_relative "datasets/cifar"
|
5
5
|
require_relative "datasets/fashion-mnist"
|
6
|
+
require_relative "datasets/hepatitis"
|
6
7
|
require_relative "datasets/iris"
|
7
8
|
require_relative "datasets/libsvm"
|
8
9
|
require_relative "datasets/libsvm-dataset-list"
|
data/lib/datasets/adult.rb
CHANGED
@@ -62,11 +62,12 @@ module Datasets
|
|
62
62
|
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}"
|
63
63
|
download(data_path, data_url)
|
64
64
|
end
|
65
|
-
|
66
|
-
|
65
|
+
|
66
|
+
options = {
|
67
67
|
converters: [:numeric, lambda {|f| f.strip}],
|
68
68
|
skip_lines: /\A\|/,
|
69
|
-
|
69
|
+
}
|
70
|
+
CSV.open(data_path, **options) do |csv|
|
70
71
|
yield(csv)
|
71
72
|
end
|
72
73
|
end
|
data/lib/datasets/downloader.rb
CHANGED
@@ -34,7 +34,9 @@ module Datasets
|
|
34
34
|
Net::HTTP.start(@url.hostname,
|
35
35
|
@url.port,
|
36
36
|
:use_ssl => (@url.scheme == "https")) do |http|
|
37
|
-
|
37
|
+
path = @url.path
|
38
|
+
path += "?#{@url.query}" if @url.query
|
39
|
+
request = Net::HTTP::Get.new(path, headers)
|
38
40
|
http.request(request) do |response|
|
39
41
|
case response
|
40
42
|
when Net::HTTPPartialContent
|
@@ -0,0 +1,207 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
require_relative "dataset"
|
4
|
+
|
5
|
+
module Datasets
|
6
|
+
class Hepatitis < Dataset
|
7
|
+
class Record < Struct.new(:label,
|
8
|
+
:age,
|
9
|
+
:sex,
|
10
|
+
:steroid,
|
11
|
+
:antivirals,
|
12
|
+
:fatigue,
|
13
|
+
:malaise,
|
14
|
+
:anorexia,
|
15
|
+
:liver_big,
|
16
|
+
:liver_firm,
|
17
|
+
:spleen_palpable,
|
18
|
+
:spiders,
|
19
|
+
:ascites,
|
20
|
+
:varices,
|
21
|
+
:bilirubin,
|
22
|
+
:alkaline_phosphate,
|
23
|
+
:sgot,
|
24
|
+
:albumin,
|
25
|
+
:protime,
|
26
|
+
:histology)
|
27
|
+
def initialize(*values)
|
28
|
+
super()
|
29
|
+
members.zip(values) do |member, value|
|
30
|
+
__send__("#{member}=", value)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def label=(label)
|
35
|
+
case label
|
36
|
+
when "1"
|
37
|
+
super(:die)
|
38
|
+
when "2"
|
39
|
+
super(:live)
|
40
|
+
else
|
41
|
+
super(label)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def age=(age)
|
46
|
+
super(normalize_integer(age))
|
47
|
+
end
|
48
|
+
|
49
|
+
def sex=(sex)
|
50
|
+
case sex
|
51
|
+
when "1"
|
52
|
+
super(:male)
|
53
|
+
when "2"
|
54
|
+
super(:female)
|
55
|
+
else
|
56
|
+
super(sex)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def steroid=(steroid)
|
61
|
+
super(normalize_boolean(steroid))
|
62
|
+
end
|
63
|
+
|
64
|
+
def antivirals=(antivirals)
|
65
|
+
super(normalize_boolean(antivirals))
|
66
|
+
end
|
67
|
+
|
68
|
+
def fatigue=(fatigue)
|
69
|
+
super(normalize_boolean(fatigue))
|
70
|
+
end
|
71
|
+
|
72
|
+
def malaise=(malaise)
|
73
|
+
super(normalize_boolean(malaise))
|
74
|
+
end
|
75
|
+
|
76
|
+
def anorexia=(anorexia)
|
77
|
+
super(normalize_boolean(anorexia))
|
78
|
+
end
|
79
|
+
|
80
|
+
def liver_big=(liver_big)
|
81
|
+
super(normalize_boolean(liver_big))
|
82
|
+
end
|
83
|
+
|
84
|
+
def liver_firm=(liver_firm)
|
85
|
+
super(normalize_boolean(liver_firm))
|
86
|
+
end
|
87
|
+
|
88
|
+
def spleen_palpable=(spleen_palpable)
|
89
|
+
super(normalize_boolean(spleen_palpable))
|
90
|
+
end
|
91
|
+
|
92
|
+
def spiders=(spiders)
|
93
|
+
super(normalize_boolean(spiders))
|
94
|
+
end
|
95
|
+
|
96
|
+
def ascites=(ascites)
|
97
|
+
super(normalize_boolean(ascites))
|
98
|
+
end
|
99
|
+
|
100
|
+
def varices=(varices)
|
101
|
+
super(normalize_boolean(varices))
|
102
|
+
end
|
103
|
+
|
104
|
+
def bilirubin=(bilirubin)
|
105
|
+
super(normalize_float(bilirubin))
|
106
|
+
end
|
107
|
+
|
108
|
+
def alkaline_phosphate=(alkaline_phosphate)
|
109
|
+
super(normalize_integer(alkaline_phosphate))
|
110
|
+
end
|
111
|
+
|
112
|
+
def sgot=(sgot)
|
113
|
+
super(normalize_integer(sgot))
|
114
|
+
end
|
115
|
+
|
116
|
+
def albumin=(albumin)
|
117
|
+
super(normalize_float(albumin))
|
118
|
+
end
|
119
|
+
|
120
|
+
def protime=(protime)
|
121
|
+
super(normalize_integer(protime))
|
122
|
+
end
|
123
|
+
|
124
|
+
def histology=(histology)
|
125
|
+
super(normalize_boolean(histology))
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
def normalize_boolean(value)
|
130
|
+
case value
|
131
|
+
when "?"
|
132
|
+
nil
|
133
|
+
when "1"
|
134
|
+
false
|
135
|
+
when "2"
|
136
|
+
true
|
137
|
+
else
|
138
|
+
value
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def normalize_float(value)
|
143
|
+
case value
|
144
|
+
when "?"
|
145
|
+
nil
|
146
|
+
else
|
147
|
+
Float(value)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def normalize_integer(value)
|
152
|
+
case value
|
153
|
+
when "?"
|
154
|
+
nil
|
155
|
+
else
|
156
|
+
Integer(value, 10)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def initialize
|
162
|
+
super()
|
163
|
+
@metadata.id = "hepatitis"
|
164
|
+
@metadata.name = "Hepatitis"
|
165
|
+
@metadata.url = "https://archive.ics.uci.edu/ml/datasets/hepatitis"
|
166
|
+
@metadata.description = lambda do
|
167
|
+
read_names
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def each
|
172
|
+
return to_enum(__method__) unless block_given?
|
173
|
+
|
174
|
+
open_data do |csv|
|
175
|
+
csv.each do |row|
|
176
|
+
record = Record.new(*row)
|
177
|
+
yield(record)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
private
|
183
|
+
def base_url
|
184
|
+
"https://archive.ics.uci.edu/ml/machine-learning-databases/hepatitis"
|
185
|
+
end
|
186
|
+
|
187
|
+
def open_data
|
188
|
+
data_path = cache_dir_path + "hepatitis.csv"
|
189
|
+
unless data_path.exist?
|
190
|
+
data_url = "#{base_url}/hepatitis.data"
|
191
|
+
download(data_path, data_url)
|
192
|
+
end
|
193
|
+
CSV.open(data_path) do |csv|
|
194
|
+
yield(csv)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def read_names
|
199
|
+
names_path = cache_dir_path + "hepatitis.names"
|
200
|
+
unless names_path.exist?
|
201
|
+
names_url = "#{base_url}/hepatitis.names"
|
202
|
+
download(names_path, names_url)
|
203
|
+
end
|
204
|
+
names_path.read
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
data/lib/datasets/version.rb
CHANGED
@@ -0,0 +1,74 @@
|
|
1
|
+
class HepatitisTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::Hepatitis.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def record(*args)
|
7
|
+
Datasets::Hepatitis::Record.new(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
test("#each") do
|
11
|
+
records = @dataset.each.to_a
|
12
|
+
assert_equal([
|
13
|
+
155,
|
14
|
+
{
|
15
|
+
:label => :live,
|
16
|
+
:age => 30,
|
17
|
+
:sex => :female,
|
18
|
+
:steroid => false,
|
19
|
+
:antivirals => true,
|
20
|
+
:fatigue => true,
|
21
|
+
:malaise => true,
|
22
|
+
:anorexia => true,
|
23
|
+
:liver_big => false,
|
24
|
+
:liver_firm => true,
|
25
|
+
:spleen_palpable => true,
|
26
|
+
:spiders => true,
|
27
|
+
:ascites => true,
|
28
|
+
:varices => true,
|
29
|
+
:bilirubin => 1.0,
|
30
|
+
:alkaline_phosphate => 85,
|
31
|
+
:sgot => 18,
|
32
|
+
:albumin => 4.0,
|
33
|
+
:protime => nil,
|
34
|
+
:histology => false,
|
35
|
+
},
|
36
|
+
{
|
37
|
+
:label => :die,
|
38
|
+
:age => 43,
|
39
|
+
:sex => :male,
|
40
|
+
:steroid => true,
|
41
|
+
:antivirals => true,
|
42
|
+
:fatigue => false,
|
43
|
+
:malaise => true,
|
44
|
+
:anorexia => true,
|
45
|
+
:liver_big => true,
|
46
|
+
:liver_firm => true,
|
47
|
+
:spleen_palpable => false,
|
48
|
+
:spiders => false,
|
49
|
+
:ascites => false,
|
50
|
+
:varices => true,
|
51
|
+
:bilirubin => 1.2,
|
52
|
+
:alkaline_phosphate => 100,
|
53
|
+
:sgot => 19,
|
54
|
+
:albumin => 3.1,
|
55
|
+
:protime => 42,
|
56
|
+
:histology => true,
|
57
|
+
}
|
58
|
+
],
|
59
|
+
[
|
60
|
+
records.size,
|
61
|
+
records[0].to_h,
|
62
|
+
records[-1].to_h,
|
63
|
+
])
|
64
|
+
end
|
65
|
+
|
66
|
+
sub_test_case("#metadata") do
|
67
|
+
test("#description") do
|
68
|
+
description = @dataset.metadata.description
|
69
|
+
assert do
|
70
|
+
description.start_with?("1. Title: Hepatitis Domain")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-datasets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomisuker
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-02-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: csv
|
@@ -112,7 +112,7 @@ dependencies:
|
|
112
112
|
description: 'You can use datasets easily because you can access each dataset with
|
113
113
|
multiple ways such as `#each` and Apache Arrow Record Batch.
|
114
114
|
|
115
|
-
'
|
115
|
+
'
|
116
116
|
email:
|
117
117
|
- tomisuker16@gmail.com
|
118
118
|
- kou@clear-code.com
|
@@ -133,6 +133,7 @@ files:
|
|
133
133
|
- lib/datasets/dictionary.rb
|
134
134
|
- lib/datasets/downloader.rb
|
135
135
|
- lib/datasets/fashion-mnist.rb
|
136
|
+
- lib/datasets/hepatitis.rb
|
136
137
|
- lib/datasets/iris.rb
|
137
138
|
- lib/datasets/libsvm-dataset-list.rb
|
138
139
|
- lib/datasets/libsvm.rb
|
@@ -152,6 +153,7 @@ files:
|
|
152
153
|
- test/test-cifar.rb
|
153
154
|
- test/test-dictionary.rb
|
154
155
|
- test/test-fashion-mnist.rb
|
156
|
+
- test/test-hepatitis.rb
|
155
157
|
- test/test-iris.rb
|
156
158
|
- test/test-libsvm-dataset-list.rb
|
157
159
|
- test/test-libsvm.rb
|
@@ -187,19 +189,20 @@ signing_key:
|
|
187
189
|
specification_version: 4
|
188
190
|
summary: Red Datasets provides classes that provide common datasets such as iris dataset.
|
189
191
|
test_files:
|
190
|
-
- test/test-
|
191
|
-
- test/test-
|
192
|
+
- test/test-wine.rb
|
193
|
+
- test/test-iris.rb
|
192
194
|
- test/test-wikipedia.rb
|
193
|
-
- test/test-
|
195
|
+
- test/test-mnist.rb
|
194
196
|
- test/helper.rb
|
195
|
-
- test/test-iris.rb
|
196
|
-
- test/test-table.rb
|
197
|
-
- test/run-test.rb
|
198
|
-
- test/test-wine.rb
|
199
197
|
- test/test-penn-treebank.rb
|
200
|
-
- test/test
|
198
|
+
- test/run-test.rb
|
199
|
+
- test/test-table.rb
|
200
|
+
- test/test-fashion-mnist.rb
|
201
201
|
- test/test-cifar.rb
|
202
|
-
- test/test-mnist.rb
|
203
|
-
- test/test-mushroom.rb
|
204
202
|
- test/test-dictionary.rb
|
205
|
-
- test/test-
|
203
|
+
- test/test-mushroom.rb
|
204
|
+
- test/test-libsvm-dataset-list.rb
|
205
|
+
- test/test-hepatitis.rb
|
206
|
+
- test/test-adult.rb
|
207
|
+
- test/test-postal-code-japan.rb
|
208
|
+
- test/test-libsvm.rb
|