red-datasets 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/news.md +18 -0
- data/lib/datasets.rb +1 -0
- data/lib/datasets/adult.rb +4 -3
- data/lib/datasets/downloader.rb +3 -1
- data/lib/datasets/hepatitis.rb +207 -0
- data/lib/datasets/version.rb +1 -1
- data/test/test-hepatitis.rb +74 -0
- metadata +17 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 120492172aae9cec1c4fc4f3b73575cb5349caf2f0b67d70676c8896324e1491
|
4
|
+
data.tar.gz: e46eb3f2875cb407e86cc0976eff7d612beb62ca6b421a51435b5d5e1bfa6e03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 360bbf78c131f20a67359ddc2055cd58502da1f4e95adf30475cd405d5eb50be6ba4fd9aa0a0857226dc803e14282cc4231de113843e96657a65e287c7500137
|
7
|
+
data.tar.gz: f88ed1ae8c8f0dad9f4d8904a265c833ceee723ba92860c0e3bed4c193d56a901c31184abd4290058de47fbc089b12b4d3b1da064f138214e2954d45eee928da
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.1.0 - 2020-02-04
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for Ruby 2.7.
|
8
|
+
[GitHub#82][GitHub#83][Patch by Yasuo Honda]
|
9
|
+
|
10
|
+
* `Datasets::Hepatitis`: Added.
|
11
|
+
[GitHub#70][Patch by KazuhiroYoshimoto]
|
12
|
+
|
13
|
+
* `Datasets::Downloader`: Added support for query.
|
14
|
+
|
15
|
+
### Thanks
|
16
|
+
|
17
|
+
* Yasuo Honda
|
18
|
+
|
19
|
+
* KazuhiroYoshimoto
|
20
|
+
|
3
21
|
## 0.0.9 - 2019-09-09
|
4
22
|
|
5
23
|
### Improvements
|
data/lib/datasets.rb
CHANGED
@@ -3,6 +3,7 @@ require_relative "datasets/version"
|
|
3
3
|
require_relative "datasets/adult"
|
4
4
|
require_relative "datasets/cifar"
|
5
5
|
require_relative "datasets/fashion-mnist"
|
6
|
+
require_relative "datasets/hepatitis"
|
6
7
|
require_relative "datasets/iris"
|
7
8
|
require_relative "datasets/libsvm"
|
8
9
|
require_relative "datasets/libsvm-dataset-list"
|
data/lib/datasets/adult.rb
CHANGED
@@ -62,11 +62,12 @@ module Datasets
|
|
62
62
|
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}"
|
63
63
|
download(data_path, data_url)
|
64
64
|
end
|
65
|
-
|
66
|
-
|
65
|
+
|
66
|
+
options = {
|
67
67
|
converters: [:numeric, lambda {|f| f.strip}],
|
68
68
|
skip_lines: /\A\|/,
|
69
|
-
|
69
|
+
}
|
70
|
+
CSV.open(data_path, **options) do |csv|
|
70
71
|
yield(csv)
|
71
72
|
end
|
72
73
|
end
|
data/lib/datasets/downloader.rb
CHANGED
@@ -34,7 +34,9 @@ module Datasets
|
|
34
34
|
Net::HTTP.start(@url.hostname,
|
35
35
|
@url.port,
|
36
36
|
:use_ssl => (@url.scheme == "https")) do |http|
|
37
|
-
|
37
|
+
path = @url.path
|
38
|
+
path += "?#{@url.query}" if @url.query
|
39
|
+
request = Net::HTTP::Get.new(path, headers)
|
38
40
|
http.request(request) do |response|
|
39
41
|
case response
|
40
42
|
when Net::HTTPPartialContent
|
@@ -0,0 +1,207 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
require_relative "dataset"
|
4
|
+
|
5
|
+
module Datasets
|
6
|
+
class Hepatitis < Dataset
|
7
|
+
class Record < Struct.new(:label,
|
8
|
+
:age,
|
9
|
+
:sex,
|
10
|
+
:steroid,
|
11
|
+
:antivirals,
|
12
|
+
:fatigue,
|
13
|
+
:malaise,
|
14
|
+
:anorexia,
|
15
|
+
:liver_big,
|
16
|
+
:liver_firm,
|
17
|
+
:spleen_palpable,
|
18
|
+
:spiders,
|
19
|
+
:ascites,
|
20
|
+
:varices,
|
21
|
+
:bilirubin,
|
22
|
+
:alkaline_phosphate,
|
23
|
+
:sgot,
|
24
|
+
:albumin,
|
25
|
+
:protime,
|
26
|
+
:histology)
|
27
|
+
def initialize(*values)
|
28
|
+
super()
|
29
|
+
members.zip(values) do |member, value|
|
30
|
+
__send__("#{member}=", value)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def label=(label)
|
35
|
+
case label
|
36
|
+
when "1"
|
37
|
+
super(:die)
|
38
|
+
when "2"
|
39
|
+
super(:live)
|
40
|
+
else
|
41
|
+
super(label)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def age=(age)
|
46
|
+
super(normalize_integer(age))
|
47
|
+
end
|
48
|
+
|
49
|
+
def sex=(sex)
|
50
|
+
case sex
|
51
|
+
when "1"
|
52
|
+
super(:male)
|
53
|
+
when "2"
|
54
|
+
super(:female)
|
55
|
+
else
|
56
|
+
super(sex)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def steroid=(steroid)
|
61
|
+
super(normalize_boolean(steroid))
|
62
|
+
end
|
63
|
+
|
64
|
+
def antivirals=(antivirals)
|
65
|
+
super(normalize_boolean(antivirals))
|
66
|
+
end
|
67
|
+
|
68
|
+
def fatigue=(fatigue)
|
69
|
+
super(normalize_boolean(fatigue))
|
70
|
+
end
|
71
|
+
|
72
|
+
def malaise=(malaise)
|
73
|
+
super(normalize_boolean(malaise))
|
74
|
+
end
|
75
|
+
|
76
|
+
def anorexia=(anorexia)
|
77
|
+
super(normalize_boolean(anorexia))
|
78
|
+
end
|
79
|
+
|
80
|
+
def liver_big=(liver_big)
|
81
|
+
super(normalize_boolean(liver_big))
|
82
|
+
end
|
83
|
+
|
84
|
+
def liver_firm=(liver_firm)
|
85
|
+
super(normalize_boolean(liver_firm))
|
86
|
+
end
|
87
|
+
|
88
|
+
def spleen_palpable=(spleen_palpable)
|
89
|
+
super(normalize_boolean(spleen_palpable))
|
90
|
+
end
|
91
|
+
|
92
|
+
def spiders=(spiders)
|
93
|
+
super(normalize_boolean(spiders))
|
94
|
+
end
|
95
|
+
|
96
|
+
def ascites=(ascites)
|
97
|
+
super(normalize_boolean(ascites))
|
98
|
+
end
|
99
|
+
|
100
|
+
def varices=(varices)
|
101
|
+
super(normalize_boolean(varices))
|
102
|
+
end
|
103
|
+
|
104
|
+
def bilirubin=(bilirubin)
|
105
|
+
super(normalize_float(bilirubin))
|
106
|
+
end
|
107
|
+
|
108
|
+
def alkaline_phosphate=(alkaline_phosphate)
|
109
|
+
super(normalize_integer(alkaline_phosphate))
|
110
|
+
end
|
111
|
+
|
112
|
+
def sgot=(sgot)
|
113
|
+
super(normalize_integer(sgot))
|
114
|
+
end
|
115
|
+
|
116
|
+
def albumin=(albumin)
|
117
|
+
super(normalize_float(albumin))
|
118
|
+
end
|
119
|
+
|
120
|
+
def protime=(protime)
|
121
|
+
super(normalize_integer(protime))
|
122
|
+
end
|
123
|
+
|
124
|
+
def histology=(histology)
|
125
|
+
super(normalize_boolean(histology))
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
def normalize_boolean(value)
|
130
|
+
case value
|
131
|
+
when "?"
|
132
|
+
nil
|
133
|
+
when "1"
|
134
|
+
false
|
135
|
+
when "2"
|
136
|
+
true
|
137
|
+
else
|
138
|
+
value
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def normalize_float(value)
|
143
|
+
case value
|
144
|
+
when "?"
|
145
|
+
nil
|
146
|
+
else
|
147
|
+
Float(value)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def normalize_integer(value)
|
152
|
+
case value
|
153
|
+
when "?"
|
154
|
+
nil
|
155
|
+
else
|
156
|
+
Integer(value, 10)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def initialize
|
162
|
+
super()
|
163
|
+
@metadata.id = "hepatitis"
|
164
|
+
@metadata.name = "Hepatitis"
|
165
|
+
@metadata.url = "https://archive.ics.uci.edu/ml/datasets/hepatitis"
|
166
|
+
@metadata.description = lambda do
|
167
|
+
read_names
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def each
|
172
|
+
return to_enum(__method__) unless block_given?
|
173
|
+
|
174
|
+
open_data do |csv|
|
175
|
+
csv.each do |row|
|
176
|
+
record = Record.new(*row)
|
177
|
+
yield(record)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
private
|
183
|
+
def base_url
|
184
|
+
"https://archive.ics.uci.edu/ml/machine-learning-databases/hepatitis"
|
185
|
+
end
|
186
|
+
|
187
|
+
def open_data
|
188
|
+
data_path = cache_dir_path + "hepatitis.csv"
|
189
|
+
unless data_path.exist?
|
190
|
+
data_url = "#{base_url}/hepatitis.data"
|
191
|
+
download(data_path, data_url)
|
192
|
+
end
|
193
|
+
CSV.open(data_path) do |csv|
|
194
|
+
yield(csv)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def read_names
|
199
|
+
names_path = cache_dir_path + "hepatitis.names"
|
200
|
+
unless names_path.exist?
|
201
|
+
names_url = "#{base_url}/hepatitis.names"
|
202
|
+
download(names_path, names_url)
|
203
|
+
end
|
204
|
+
names_path.read
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
data/lib/datasets/version.rb
CHANGED
@@ -0,0 +1,74 @@
|
|
1
|
+
class HepatitisTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::Hepatitis.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def record(*args)
|
7
|
+
Datasets::Hepatitis::Record.new(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
test("#each") do
|
11
|
+
records = @dataset.each.to_a
|
12
|
+
assert_equal([
|
13
|
+
155,
|
14
|
+
{
|
15
|
+
:label => :live,
|
16
|
+
:age => 30,
|
17
|
+
:sex => :female,
|
18
|
+
:steroid => false,
|
19
|
+
:antivirals => true,
|
20
|
+
:fatigue => true,
|
21
|
+
:malaise => true,
|
22
|
+
:anorexia => true,
|
23
|
+
:liver_big => false,
|
24
|
+
:liver_firm => true,
|
25
|
+
:spleen_palpable => true,
|
26
|
+
:spiders => true,
|
27
|
+
:ascites => true,
|
28
|
+
:varices => true,
|
29
|
+
:bilirubin => 1.0,
|
30
|
+
:alkaline_phosphate => 85,
|
31
|
+
:sgot => 18,
|
32
|
+
:albumin => 4.0,
|
33
|
+
:protime => nil,
|
34
|
+
:histology => false,
|
35
|
+
},
|
36
|
+
{
|
37
|
+
:label => :die,
|
38
|
+
:age => 43,
|
39
|
+
:sex => :male,
|
40
|
+
:steroid => true,
|
41
|
+
:antivirals => true,
|
42
|
+
:fatigue => false,
|
43
|
+
:malaise => true,
|
44
|
+
:anorexia => true,
|
45
|
+
:liver_big => true,
|
46
|
+
:liver_firm => true,
|
47
|
+
:spleen_palpable => false,
|
48
|
+
:spiders => false,
|
49
|
+
:ascites => false,
|
50
|
+
:varices => true,
|
51
|
+
:bilirubin => 1.2,
|
52
|
+
:alkaline_phosphate => 100,
|
53
|
+
:sgot => 19,
|
54
|
+
:albumin => 3.1,
|
55
|
+
:protime => 42,
|
56
|
+
:histology => true,
|
57
|
+
}
|
58
|
+
],
|
59
|
+
[
|
60
|
+
records.size,
|
61
|
+
records[0].to_h,
|
62
|
+
records[-1].to_h,
|
63
|
+
])
|
64
|
+
end
|
65
|
+
|
66
|
+
sub_test_case("#metadata") do
|
67
|
+
test("#description") do
|
68
|
+
description = @dataset.metadata.description
|
69
|
+
assert do
|
70
|
+
description.start_with?("1. Title: Hepatitis Domain")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-datasets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomisuker
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-02-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: csv
|
@@ -112,7 +112,7 @@ dependencies:
|
|
112
112
|
description: 'You can use datasets easily because you can access each dataset with
|
113
113
|
multiple ways such as `#each` and Apache Arrow Record Batch.
|
114
114
|
|
115
|
-
'
|
115
|
+
'
|
116
116
|
email:
|
117
117
|
- tomisuker16@gmail.com
|
118
118
|
- kou@clear-code.com
|
@@ -133,6 +133,7 @@ files:
|
|
133
133
|
- lib/datasets/dictionary.rb
|
134
134
|
- lib/datasets/downloader.rb
|
135
135
|
- lib/datasets/fashion-mnist.rb
|
136
|
+
- lib/datasets/hepatitis.rb
|
136
137
|
- lib/datasets/iris.rb
|
137
138
|
- lib/datasets/libsvm-dataset-list.rb
|
138
139
|
- lib/datasets/libsvm.rb
|
@@ -152,6 +153,7 @@ files:
|
|
152
153
|
- test/test-cifar.rb
|
153
154
|
- test/test-dictionary.rb
|
154
155
|
- test/test-fashion-mnist.rb
|
156
|
+
- test/test-hepatitis.rb
|
155
157
|
- test/test-iris.rb
|
156
158
|
- test/test-libsvm-dataset-list.rb
|
157
159
|
- test/test-libsvm.rb
|
@@ -187,19 +189,20 @@ signing_key:
|
|
187
189
|
specification_version: 4
|
188
190
|
summary: Red Datasets provides classes that provide common datasets such as iris dataset.
|
189
191
|
test_files:
|
190
|
-
- test/test-
|
191
|
-
- test/test-
|
192
|
+
- test/test-wine.rb
|
193
|
+
- test/test-iris.rb
|
192
194
|
- test/test-wikipedia.rb
|
193
|
-
- test/test-
|
195
|
+
- test/test-mnist.rb
|
194
196
|
- test/helper.rb
|
195
|
-
- test/test-iris.rb
|
196
|
-
- test/test-table.rb
|
197
|
-
- test/run-test.rb
|
198
|
-
- test/test-wine.rb
|
199
197
|
- test/test-penn-treebank.rb
|
200
|
-
- test/test
|
198
|
+
- test/run-test.rb
|
199
|
+
- test/test-table.rb
|
200
|
+
- test/test-fashion-mnist.rb
|
201
201
|
- test/test-cifar.rb
|
202
|
-
- test/test-mnist.rb
|
203
|
-
- test/test-mushroom.rb
|
204
202
|
- test/test-dictionary.rb
|
205
|
-
- test/test-
|
203
|
+
- test/test-mushroom.rb
|
204
|
+
- test/test-libsvm-dataset-list.rb
|
205
|
+
- test/test-hepatitis.rb
|
206
|
+
- test/test-adult.rb
|
207
|
+
- test/test-postal-code-japan.rb
|
208
|
+
- test/test-libsvm.rb
|