red-datasets 0.0.9 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 81ed53e83d75d517052aaf07c66fe177f12f986584141c951ac1dcfa2fc88646
4
- data.tar.gz: 94b9f3b8042eaad65304bf7c3d2fc35519f8328b0ca4e9f8a7ad9be13781a91e
3
+ metadata.gz: 120492172aae9cec1c4fc4f3b73575cb5349caf2f0b67d70676c8896324e1491
4
+ data.tar.gz: e46eb3f2875cb407e86cc0976eff7d612beb62ca6b421a51435b5d5e1bfa6e03
5
5
  SHA512:
6
- metadata.gz: c73561ed005e4b58f27fc6de969605a22d57adf4bc5b5184e5cdb65739f1ac6b86f6ed67794bfe61164859fc4a1b0f80430bc819b2ea37ac455a560a6f008b13
7
- data.tar.gz: 07560b09d68272dc7a959c16ec03975d1fa752f9d6930f0fd746c46e9236995606694f5899bad5bf770812c5a2d81e6f013353f680fc8adf65ad42bae514f57c
6
+ metadata.gz: 360bbf78c131f20a67359ddc2055cd58502da1f4e95adf30475cd405d5eb50be6ba4fd9aa0a0857226dc803e14282cc4231de113843e96657a65e287c7500137
7
+ data.tar.gz: f88ed1ae8c8f0dad9f4d8904a265c833ceee723ba92860c0e3bed4c193d56a901c31184abd4290058de47fbc089b12b4d3b1da064f138214e2954d45eee928da
@@ -1,5 +1,23 @@
1
1
  # News
2
2
 
3
+ ## 0.1.0 - 2020-02-04
4
+
5
+ ### Improvements
6
+
7
+ * Added support for Ruby 2.7.
8
+ [GitHub#82][GitHub#83][Patch by Yasuo Honda]
9
+
10
+ * `Datasets::Hepatitis`: Added.
11
+ [GitHub#70][Patch by KazuhiroYoshimoto]
12
+
13
+ * `Datasets::Downloader`: Added support for query.
14
+
15
+ ### Thanks
16
+
17
+ * Yasuo Honda
18
+
19
+ * KazuhiroYoshimoto
20
+
3
21
  ## 0.0.9 - 2019-09-09
4
22
 
5
23
  ### Improvements
@@ -3,6 +3,7 @@ require_relative "datasets/version"
3
3
  require_relative "datasets/adult"
4
4
  require_relative "datasets/cifar"
5
5
  require_relative "datasets/fashion-mnist"
6
+ require_relative "datasets/hepatitis"
6
7
  require_relative "datasets/iris"
7
8
  require_relative "datasets/libsvm"
8
9
  require_relative "datasets/libsvm-dataset-list"
@@ -62,11 +62,12 @@ module Datasets
62
62
  data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}"
63
63
  download(data_path, data_url)
64
64
  end
65
- CSV.open(data_path,
66
- {
65
+
66
+ options = {
67
67
  converters: [:numeric, lambda {|f| f.strip}],
68
68
  skip_lines: /\A\|/,
69
- }) do |csv|
69
+ }
70
+ CSV.open(data_path, **options) do |csv|
70
71
  yield(csv)
71
72
  end
72
73
  end
@@ -34,7 +34,9 @@ module Datasets
34
34
  Net::HTTP.start(@url.hostname,
35
35
  @url.port,
36
36
  :use_ssl => (@url.scheme == "https")) do |http|
37
- request = Net::HTTP::Get.new(@url.path, headers)
37
+ path = @url.path
38
+ path += "?#{@url.query}" if @url.query
39
+ request = Net::HTTP::Get.new(path, headers)
38
40
  http.request(request) do |response|
39
41
  case response
40
42
  when Net::HTTPPartialContent
@@ -0,0 +1,207 @@
1
+ require "csv"
2
+
3
+ require_relative "dataset"
4
+
5
+ module Datasets
6
+ class Hepatitis < Dataset
7
+ class Record < Struct.new(:label,
8
+ :age,
9
+ :sex,
10
+ :steroid,
11
+ :antivirals,
12
+ :fatigue,
13
+ :malaise,
14
+ :anorexia,
15
+ :liver_big,
16
+ :liver_firm,
17
+ :spleen_palpable,
18
+ :spiders,
19
+ :ascites,
20
+ :varices,
21
+ :bilirubin,
22
+ :alkaline_phosphate,
23
+ :sgot,
24
+ :albumin,
25
+ :protime,
26
+ :histology)
27
+ def initialize(*values)
28
+ super()
29
+ members.zip(values) do |member, value|
30
+ __send__("#{member}=", value)
31
+ end
32
+ end
33
+
34
+ def label=(label)
35
+ case label
36
+ when "1"
37
+ super(:die)
38
+ when "2"
39
+ super(:live)
40
+ else
41
+ super(label)
42
+ end
43
+ end
44
+
45
+ def age=(age)
46
+ super(normalize_integer(age))
47
+ end
48
+
49
+ def sex=(sex)
50
+ case sex
51
+ when "1"
52
+ super(:male)
53
+ when "2"
54
+ super(:female)
55
+ else
56
+ super(sex)
57
+ end
58
+ end
59
+
60
+ def steroid=(steroid)
61
+ super(normalize_boolean(steroid))
62
+ end
63
+
64
+ def antivirals=(antivirals)
65
+ super(normalize_boolean(antivirals))
66
+ end
67
+
68
+ def fatigue=(fatigue)
69
+ super(normalize_boolean(fatigue))
70
+ end
71
+
72
+ def malaise=(malaise)
73
+ super(normalize_boolean(malaise))
74
+ end
75
+
76
+ def anorexia=(anorexia)
77
+ super(normalize_boolean(anorexia))
78
+ end
79
+
80
+ def liver_big=(liver_big)
81
+ super(normalize_boolean(liver_big))
82
+ end
83
+
84
+ def liver_firm=(liver_firm)
85
+ super(normalize_boolean(liver_firm))
86
+ end
87
+
88
+ def spleen_palpable=(spleen_palpable)
89
+ super(normalize_boolean(spleen_palpable))
90
+ end
91
+
92
+ def spiders=(spiders)
93
+ super(normalize_boolean(spiders))
94
+ end
95
+
96
+ def ascites=(ascites)
97
+ super(normalize_boolean(ascites))
98
+ end
99
+
100
+ def varices=(varices)
101
+ super(normalize_boolean(varices))
102
+ end
103
+
104
+ def bilirubin=(bilirubin)
105
+ super(normalize_float(bilirubin))
106
+ end
107
+
108
+ def alkaline_phosphate=(alkaline_phosphate)
109
+ super(normalize_integer(alkaline_phosphate))
110
+ end
111
+
112
+ def sgot=(sgot)
113
+ super(normalize_integer(sgot))
114
+ end
115
+
116
+ def albumin=(albumin)
117
+ super(normalize_float(albumin))
118
+ end
119
+
120
+ def protime=(protime)
121
+ super(normalize_integer(protime))
122
+ end
123
+
124
+ def histology=(histology)
125
+ super(normalize_boolean(histology))
126
+ end
127
+
128
+ private
129
+ def normalize_boolean(value)
130
+ case value
131
+ when "?"
132
+ nil
133
+ when "1"
134
+ false
135
+ when "2"
136
+ true
137
+ else
138
+ value
139
+ end
140
+ end
141
+
142
+ def normalize_float(value)
143
+ case value
144
+ when "?"
145
+ nil
146
+ else
147
+ Float(value)
148
+ end
149
+ end
150
+
151
+ def normalize_integer(value)
152
+ case value
153
+ when "?"
154
+ nil
155
+ else
156
+ Integer(value, 10)
157
+ end
158
+ end
159
+ end
160
+
161
+ def initialize
162
+ super()
163
+ @metadata.id = "hepatitis"
164
+ @metadata.name = "Hepatitis"
165
+ @metadata.url = "https://archive.ics.uci.edu/ml/datasets/hepatitis"
166
+ @metadata.description = lambda do
167
+ read_names
168
+ end
169
+ end
170
+
171
+ def each
172
+ return to_enum(__method__) unless block_given?
173
+
174
+ open_data do |csv|
175
+ csv.each do |row|
176
+ record = Record.new(*row)
177
+ yield(record)
178
+ end
179
+ end
180
+ end
181
+
182
+ private
183
+ def base_url
184
+ "https://archive.ics.uci.edu/ml/machine-learning-databases/hepatitis"
185
+ end
186
+
187
+ def open_data
188
+ data_path = cache_dir_path + "hepatitis.csv"
189
+ unless data_path.exist?
190
+ data_url = "#{base_url}/hepatitis.data"
191
+ download(data_path, data_url)
192
+ end
193
+ CSV.open(data_path) do |csv|
194
+ yield(csv)
195
+ end
196
+ end
197
+
198
+ def read_names
199
+ names_path = cache_dir_path + "hepatitis.names"
200
+ unless names_path.exist?
201
+ names_url = "#{base_url}/hepatitis.names"
202
+ download(names_path, names_url)
203
+ end
204
+ names_path.read
205
+ end
206
+ end
207
+ end
@@ -1,3 +1,3 @@
1
1
  module Datasets
2
- VERSION = "0.0.9"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,74 @@
1
+ class HepatitisTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::Hepatitis.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::Hepatitis::Record.new(*args)
8
+ end
9
+
10
+ test("#each") do
11
+ records = @dataset.each.to_a
12
+ assert_equal([
13
+ 155,
14
+ {
15
+ :label => :live,
16
+ :age => 30,
17
+ :sex => :female,
18
+ :steroid => false,
19
+ :antivirals => true,
20
+ :fatigue => true,
21
+ :malaise => true,
22
+ :anorexia => true,
23
+ :liver_big => false,
24
+ :liver_firm => true,
25
+ :spleen_palpable => true,
26
+ :spiders => true,
27
+ :ascites => true,
28
+ :varices => true,
29
+ :bilirubin => 1.0,
30
+ :alkaline_phosphate => 85,
31
+ :sgot => 18,
32
+ :albumin => 4.0,
33
+ :protime => nil,
34
+ :histology => false,
35
+ },
36
+ {
37
+ :label => :die,
38
+ :age => 43,
39
+ :sex => :male,
40
+ :steroid => true,
41
+ :antivirals => true,
42
+ :fatigue => false,
43
+ :malaise => true,
44
+ :anorexia => true,
45
+ :liver_big => true,
46
+ :liver_firm => true,
47
+ :spleen_palpable => false,
48
+ :spiders => false,
49
+ :ascites => false,
50
+ :varices => true,
51
+ :bilirubin => 1.2,
52
+ :alkaline_phosphate => 100,
53
+ :sgot => 19,
54
+ :albumin => 3.1,
55
+ :protime => 42,
56
+ :histology => true,
57
+ }
58
+ ],
59
+ [
60
+ records.size,
61
+ records[0].to_h,
62
+ records[-1].to_h,
63
+ ])
64
+ end
65
+
66
+ sub_test_case("#metadata") do
67
+ test("#description") do
68
+ description = @dataset.metadata.description
69
+ assert do
70
+ description.start_with?("1. Title: Hepatitis Domain")
71
+ end
72
+ end
73
+ end
74
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-09-09 00:00:00.000000000 Z
12
+ date: 2020-02-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: csv
@@ -112,7 +112,7 @@ dependencies:
112
112
  description: 'You can use datasets easily because you can access each dataset with
113
113
  multiple ways such as `#each` and Apache Arrow Record Batch.
114
114
 
115
- '
115
+ '
116
116
  email:
117
117
  - tomisuker16@gmail.com
118
118
  - kou@clear-code.com
@@ -133,6 +133,7 @@ files:
133
133
  - lib/datasets/dictionary.rb
134
134
  - lib/datasets/downloader.rb
135
135
  - lib/datasets/fashion-mnist.rb
136
+ - lib/datasets/hepatitis.rb
136
137
  - lib/datasets/iris.rb
137
138
  - lib/datasets/libsvm-dataset-list.rb
138
139
  - lib/datasets/libsvm.rb
@@ -152,6 +153,7 @@ files:
152
153
  - test/test-cifar.rb
153
154
  - test/test-dictionary.rb
154
155
  - test/test-fashion-mnist.rb
156
+ - test/test-hepatitis.rb
155
157
  - test/test-iris.rb
156
158
  - test/test-libsvm-dataset-list.rb
157
159
  - test/test-libsvm.rb
@@ -187,19 +189,20 @@ signing_key:
187
189
  specification_version: 4
188
190
  summary: Red Datasets provides classes that provide common datasets such as iris dataset.
189
191
  test_files:
190
- - test/test-adult.rb
191
- - test/test-libsvm.rb
192
+ - test/test-wine.rb
193
+ - test/test-iris.rb
192
194
  - test/test-wikipedia.rb
193
- - test/test-libsvm-dataset-list.rb
195
+ - test/test-mnist.rb
194
196
  - test/helper.rb
195
- - test/test-iris.rb
196
- - test/test-table.rb
197
- - test/run-test.rb
198
- - test/test-wine.rb
199
197
  - test/test-penn-treebank.rb
200
- - test/test-postal-code-japan.rb
198
+ - test/run-test.rb
199
+ - test/test-table.rb
200
+ - test/test-fashion-mnist.rb
201
201
  - test/test-cifar.rb
202
- - test/test-mnist.rb
203
- - test/test-mushroom.rb
204
202
  - test/test-dictionary.rb
205
- - test/test-fashion-mnist.rb
203
+ - test/test-mushroom.rb
204
+ - test/test-libsvm-dataset-list.rb
205
+ - test/test-hepatitis.rb
206
+ - test/test-adult.rb
207
+ - test/test-postal-code-japan.rb
208
+ - test/test-libsvm.rb