red-datasets 0.0.9 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 81ed53e83d75d517052aaf07c66fe177f12f986584141c951ac1dcfa2fc88646
4
- data.tar.gz: 94b9f3b8042eaad65304bf7c3d2fc35519f8328b0ca4e9f8a7ad9be13781a91e
3
+ metadata.gz: 120492172aae9cec1c4fc4f3b73575cb5349caf2f0b67d70676c8896324e1491
4
+ data.tar.gz: e46eb3f2875cb407e86cc0976eff7d612beb62ca6b421a51435b5d5e1bfa6e03
5
5
  SHA512:
6
- metadata.gz: c73561ed005e4b58f27fc6de969605a22d57adf4bc5b5184e5cdb65739f1ac6b86f6ed67794bfe61164859fc4a1b0f80430bc819b2ea37ac455a560a6f008b13
7
- data.tar.gz: 07560b09d68272dc7a959c16ec03975d1fa752f9d6930f0fd746c46e9236995606694f5899bad5bf770812c5a2d81e6f013353f680fc8adf65ad42bae514f57c
6
+ metadata.gz: 360bbf78c131f20a67359ddc2055cd58502da1f4e95adf30475cd405d5eb50be6ba4fd9aa0a0857226dc803e14282cc4231de113843e96657a65e287c7500137
7
+ data.tar.gz: f88ed1ae8c8f0dad9f4d8904a265c833ceee723ba92860c0e3bed4c193d56a901c31184abd4290058de47fbc089b12b4d3b1da064f138214e2954d45eee928da
@@ -1,5 +1,23 @@
1
1
  # News
2
2
 
3
+ ## 0.1.0 - 2020-02-04
4
+
5
+ ### Improvements
6
+
7
+ * Added support for Ruby 2.7.
8
+ [GitHub#82][GitHub#83][Patch by Yasuo Honda]
9
+
10
+ * `Datasets::Hepatitis`: Added.
11
+ [GitHub#70][Patch by KazuhiroYoshimoto]
12
+
13
+ * `Datasets::Downloader`: Added support for query.
14
+
15
+ ### Thanks
16
+
17
+ * Yasuo Honda
18
+
19
+ * KazuhiroYoshimoto
20
+
3
21
  ## 0.0.9 - 2019-09-09
4
22
 
5
23
  ### Improvements
@@ -3,6 +3,7 @@ require_relative "datasets/version"
3
3
  require_relative "datasets/adult"
4
4
  require_relative "datasets/cifar"
5
5
  require_relative "datasets/fashion-mnist"
6
+ require_relative "datasets/hepatitis"
6
7
  require_relative "datasets/iris"
7
8
  require_relative "datasets/libsvm"
8
9
  require_relative "datasets/libsvm-dataset-list"
@@ -62,11 +62,12 @@ module Datasets
62
62
  data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}"
63
63
  download(data_path, data_url)
64
64
  end
65
- CSV.open(data_path,
66
- {
65
+
66
+ options = {
67
67
  converters: [:numeric, lambda {|f| f.strip}],
68
68
  skip_lines: /\A\|/,
69
- }) do |csv|
69
+ }
70
+ CSV.open(data_path, **options) do |csv|
70
71
  yield(csv)
71
72
  end
72
73
  end
@@ -34,7 +34,9 @@ module Datasets
34
34
  Net::HTTP.start(@url.hostname,
35
35
  @url.port,
36
36
  :use_ssl => (@url.scheme == "https")) do |http|
37
- request = Net::HTTP::Get.new(@url.path, headers)
37
+ path = @url.path
38
+ path += "?#{@url.query}" if @url.query
39
+ request = Net::HTTP::Get.new(path, headers)
38
40
  http.request(request) do |response|
39
41
  case response
40
42
  when Net::HTTPPartialContent
@@ -0,0 +1,207 @@
1
+ require "csv"
2
+
3
+ require_relative "dataset"
4
+
5
+ module Datasets
6
+ class Hepatitis < Dataset
7
+ class Record < Struct.new(:label,
8
+ :age,
9
+ :sex,
10
+ :steroid,
11
+ :antivirals,
12
+ :fatigue,
13
+ :malaise,
14
+ :anorexia,
15
+ :liver_big,
16
+ :liver_firm,
17
+ :spleen_palpable,
18
+ :spiders,
19
+ :ascites,
20
+ :varices,
21
+ :bilirubin,
22
+ :alkaline_phosphate,
23
+ :sgot,
24
+ :albumin,
25
+ :protime,
26
+ :histology)
27
+ def initialize(*values)
28
+ super()
29
+ members.zip(values) do |member, value|
30
+ __send__("#{member}=", value)
31
+ end
32
+ end
33
+
34
+ def label=(label)
35
+ case label
36
+ when "1"
37
+ super(:die)
38
+ when "2"
39
+ super(:live)
40
+ else
41
+ super(label)
42
+ end
43
+ end
44
+
45
+ def age=(age)
46
+ super(normalize_integer(age))
47
+ end
48
+
49
+ def sex=(sex)
50
+ case sex
51
+ when "1"
52
+ super(:male)
53
+ when "2"
54
+ super(:female)
55
+ else
56
+ super(sex)
57
+ end
58
+ end
59
+
60
+ def steroid=(steroid)
61
+ super(normalize_boolean(steroid))
62
+ end
63
+
64
+ def antivirals=(antivirals)
65
+ super(normalize_boolean(antivirals))
66
+ end
67
+
68
+ def fatigue=(fatigue)
69
+ super(normalize_boolean(fatigue))
70
+ end
71
+
72
+ def malaise=(malaise)
73
+ super(normalize_boolean(malaise))
74
+ end
75
+
76
+ def anorexia=(anorexia)
77
+ super(normalize_boolean(anorexia))
78
+ end
79
+
80
+ def liver_big=(liver_big)
81
+ super(normalize_boolean(liver_big))
82
+ end
83
+
84
+ def liver_firm=(liver_firm)
85
+ super(normalize_boolean(liver_firm))
86
+ end
87
+
88
+ def spleen_palpable=(spleen_palpable)
89
+ super(normalize_boolean(spleen_palpable))
90
+ end
91
+
92
+ def spiders=(spiders)
93
+ super(normalize_boolean(spiders))
94
+ end
95
+
96
+ def ascites=(ascites)
97
+ super(normalize_boolean(ascites))
98
+ end
99
+
100
+ def varices=(varices)
101
+ super(normalize_boolean(varices))
102
+ end
103
+
104
+ def bilirubin=(bilirubin)
105
+ super(normalize_float(bilirubin))
106
+ end
107
+
108
+ def alkaline_phosphate=(alkaline_phosphate)
109
+ super(normalize_integer(alkaline_phosphate))
110
+ end
111
+
112
+ def sgot=(sgot)
113
+ super(normalize_integer(sgot))
114
+ end
115
+
116
+ def albumin=(albumin)
117
+ super(normalize_float(albumin))
118
+ end
119
+
120
+ def protime=(protime)
121
+ super(normalize_integer(protime))
122
+ end
123
+
124
+ def histology=(histology)
125
+ super(normalize_boolean(histology))
126
+ end
127
+
128
+ private
129
+ def normalize_boolean(value)
130
+ case value
131
+ when "?"
132
+ nil
133
+ when "1"
134
+ false
135
+ when "2"
136
+ true
137
+ else
138
+ value
139
+ end
140
+ end
141
+
142
+ def normalize_float(value)
143
+ case value
144
+ when "?"
145
+ nil
146
+ else
147
+ Float(value)
148
+ end
149
+ end
150
+
151
+ def normalize_integer(value)
152
+ case value
153
+ when "?"
154
+ nil
155
+ else
156
+ Integer(value, 10)
157
+ end
158
+ end
159
+ end
160
+
161
+ def initialize
162
+ super()
163
+ @metadata.id = "hepatitis"
164
+ @metadata.name = "Hepatitis"
165
+ @metadata.url = "https://archive.ics.uci.edu/ml/datasets/hepatitis"
166
+ @metadata.description = lambda do
167
+ read_names
168
+ end
169
+ end
170
+
171
+ def each
172
+ return to_enum(__method__) unless block_given?
173
+
174
+ open_data do |csv|
175
+ csv.each do |row|
176
+ record = Record.new(*row)
177
+ yield(record)
178
+ end
179
+ end
180
+ end
181
+
182
+ private
183
+ def base_url
184
+ "https://archive.ics.uci.edu/ml/machine-learning-databases/hepatitis"
185
+ end
186
+
187
+ def open_data
188
+ data_path = cache_dir_path + "hepatitis.csv"
189
+ unless data_path.exist?
190
+ data_url = "#{base_url}/hepatitis.data"
191
+ download(data_path, data_url)
192
+ end
193
+ CSV.open(data_path) do |csv|
194
+ yield(csv)
195
+ end
196
+ end
197
+
198
+ def read_names
199
+ names_path = cache_dir_path + "hepatitis.names"
200
+ unless names_path.exist?
201
+ names_url = "#{base_url}/hepatitis.names"
202
+ download(names_path, names_url)
203
+ end
204
+ names_path.read
205
+ end
206
+ end
207
+ end
@@ -1,3 +1,3 @@
1
1
  module Datasets
2
- VERSION = "0.0.9"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,74 @@
1
+ class HepatitisTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::Hepatitis.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::Hepatitis::Record.new(*args)
8
+ end
9
+
10
+ test("#each") do
11
+ records = @dataset.each.to_a
12
+ assert_equal([
13
+ 155,
14
+ {
15
+ :label => :live,
16
+ :age => 30,
17
+ :sex => :female,
18
+ :steroid => false,
19
+ :antivirals => true,
20
+ :fatigue => true,
21
+ :malaise => true,
22
+ :anorexia => true,
23
+ :liver_big => false,
24
+ :liver_firm => true,
25
+ :spleen_palpable => true,
26
+ :spiders => true,
27
+ :ascites => true,
28
+ :varices => true,
29
+ :bilirubin => 1.0,
30
+ :alkaline_phosphate => 85,
31
+ :sgot => 18,
32
+ :albumin => 4.0,
33
+ :protime => nil,
34
+ :histology => false,
35
+ },
36
+ {
37
+ :label => :die,
38
+ :age => 43,
39
+ :sex => :male,
40
+ :steroid => true,
41
+ :antivirals => true,
42
+ :fatigue => false,
43
+ :malaise => true,
44
+ :anorexia => true,
45
+ :liver_big => true,
46
+ :liver_firm => true,
47
+ :spleen_palpable => false,
48
+ :spiders => false,
49
+ :ascites => false,
50
+ :varices => true,
51
+ :bilirubin => 1.2,
52
+ :alkaline_phosphate => 100,
53
+ :sgot => 19,
54
+ :albumin => 3.1,
55
+ :protime => 42,
56
+ :histology => true,
57
+ }
58
+ ],
59
+ [
60
+ records.size,
61
+ records[0].to_h,
62
+ records[-1].to_h,
63
+ ])
64
+ end
65
+
66
+ sub_test_case("#metadata") do
67
+ test("#description") do
68
+ description = @dataset.metadata.description
69
+ assert do
70
+ description.start_with?("1. Title: Hepatitis Domain")
71
+ end
72
+ end
73
+ end
74
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-09-09 00:00:00.000000000 Z
12
+ date: 2020-02-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: csv
@@ -112,7 +112,7 @@ dependencies:
112
112
  description: 'You can use datasets easily because you can access each dataset with
113
113
  multiple ways such as `#each` and Apache Arrow Record Batch.
114
114
 
115
- '
115
+ '
116
116
  email:
117
117
  - tomisuker16@gmail.com
118
118
  - kou@clear-code.com
@@ -133,6 +133,7 @@ files:
133
133
  - lib/datasets/dictionary.rb
134
134
  - lib/datasets/downloader.rb
135
135
  - lib/datasets/fashion-mnist.rb
136
+ - lib/datasets/hepatitis.rb
136
137
  - lib/datasets/iris.rb
137
138
  - lib/datasets/libsvm-dataset-list.rb
138
139
  - lib/datasets/libsvm.rb
@@ -152,6 +153,7 @@ files:
152
153
  - test/test-cifar.rb
153
154
  - test/test-dictionary.rb
154
155
  - test/test-fashion-mnist.rb
156
+ - test/test-hepatitis.rb
155
157
  - test/test-iris.rb
156
158
  - test/test-libsvm-dataset-list.rb
157
159
  - test/test-libsvm.rb
@@ -187,19 +189,20 @@ signing_key:
187
189
  specification_version: 4
188
190
  summary: Red Datasets provides classes that provide common datasets such as iris dataset.
189
191
  test_files:
190
- - test/test-adult.rb
191
- - test/test-libsvm.rb
192
+ - test/test-wine.rb
193
+ - test/test-iris.rb
192
194
  - test/test-wikipedia.rb
193
- - test/test-libsvm-dataset-list.rb
195
+ - test/test-mnist.rb
194
196
  - test/helper.rb
195
- - test/test-iris.rb
196
- - test/test-table.rb
197
- - test/run-test.rb
198
- - test/test-wine.rb
199
197
  - test/test-penn-treebank.rb
200
- - test/test-postal-code-japan.rb
198
+ - test/run-test.rb
199
+ - test/test-table.rb
200
+ - test/test-fashion-mnist.rb
201
201
  - test/test-cifar.rb
202
- - test/test-mnist.rb
203
- - test/test-mushroom.rb
204
202
  - test/test-dictionary.rb
205
- - test/test-fashion-mnist.rb
203
+ - test/test-mushroom.rb
204
+ - test/test-libsvm-dataset-list.rb
205
+ - test/test-hepatitis.rb
206
+ - test/test-adult.rb
207
+ - test/test-postal-code-japan.rb
208
+ - test/test-libsvm.rb