red-datasets 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/news.md +13 -0
- data/lib/datasets/dataset.rb +2 -2
- data/lib/datasets/downloader.rb +26 -3
- data/lib/datasets/mnist.rb +0 -1
- data/lib/datasets/penguins.rb +28 -5
- data/lib/datasets/version.rb +1 -1
- data/test/test-house-of-councillor.rb +44 -44
- data/test/test-rdataset.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f550440e4e909e368257c125ad39507aca47ad7c232b8134c984eea05de1a49e
|
4
|
+
data.tar.gz: 0a5617cf2f1de8953d4aa79649df6389e33c77115e52ce0b27798b1e0b9fa049
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ce66a006fac20552681255204a5ea3d08a6cb37e07d648083d757e5f77e8c20d789b4117882b282846c28321579629848e5eb0b0b9d46fca129500d1ac6ece1
|
7
|
+
data.tar.gz: 70b72119a5c22309c90059057d195d4a036b1b4b13c7c8cc0cb82a26580dd16c3402d1d8c40a27bbdc0aa89c3713eafc8e7dc35fa644c2bec439321caf365db3
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.2.0 - 2025-04-13
|
4
|
+
|
5
|
+
### Fixes
|
6
|
+
|
7
|
+
* `Datasets::MNIST`: Fixed a bug that dataset can't be downloaded.
|
8
|
+
|
9
|
+
## 0.1.9 - 2025-04-08
|
10
|
+
|
11
|
+
### Improvements
|
12
|
+
|
13
|
+
* `Datasets::Penguins`: Changed to use `POST` for downloading data
|
14
|
+
from EDI.
|
15
|
+
|
3
16
|
## 0.1.8 - 2025-02-07
|
4
17
|
|
5
18
|
### Improvements
|
data/lib/datasets/dataset.rb
CHANGED
@@ -33,8 +33,8 @@ module Datasets
|
|
33
33
|
@cache_path ||= CachePath.new(@metadata.id)
|
34
34
|
end
|
35
35
|
|
36
|
-
def download(output_path, url, *fallback_urls, &block)
|
37
|
-
downloader = Downloader.new(url, *fallback_urls)
|
36
|
+
def download(output_path, url, *fallback_urls, **options, &block)
|
37
|
+
downloader = Downloader.new(url, *fallback_urls, **options)
|
38
38
|
downloader.download(output_path, &block)
|
39
39
|
end
|
40
40
|
|
data/lib/datasets/downloader.rb
CHANGED
@@ -12,9 +12,11 @@ module Datasets
|
|
12
12
|
class Downloader
|
13
13
|
class TooManyRedirects < Error; end
|
14
14
|
|
15
|
-
def initialize(url, *fallback_urls)
|
15
|
+
def initialize(url, *fallback_urls, http_method: nil, http_parameters: nil)
|
16
16
|
@url = normalize_url(url)
|
17
17
|
@fallback_urls = fallback_urls.collect { |fallback_url| normalize_url(fallback_url) }
|
18
|
+
@http_method = http_method
|
19
|
+
@http_parameters = http_parameters
|
18
20
|
end
|
19
21
|
|
20
22
|
def download(output_path, &block)
|
@@ -151,7 +153,27 @@ module Datasets
|
|
151
153
|
http.start do
|
152
154
|
path = url.path
|
153
155
|
path += "?#{url.query}" if url.query
|
154
|
-
|
156
|
+
if @http_method == :post
|
157
|
+
# TODO: We may want to add @http_content_type, @http_body
|
158
|
+
# and so on.
|
159
|
+
if @http_parameters
|
160
|
+
body = URI.encode_www_form(@http_parameters)
|
161
|
+
content_type = "application/x-www-form-urlencoded"
|
162
|
+
headers = {"Content-Type" => content_type}.merge(headers)
|
163
|
+
else
|
164
|
+
body = ""
|
165
|
+
end
|
166
|
+
request = Net::HTTP::Post.new(path, headers)
|
167
|
+
request.body = body
|
168
|
+
else
|
169
|
+
request = Net::HTTP::Get.new(path, headers)
|
170
|
+
end
|
171
|
+
if url.scheme == "https" and url.host == "api.github.com"
|
172
|
+
gh_token = ENV["GH_TOKEN"]
|
173
|
+
if gh_token
|
174
|
+
headers = headers.merge("Authorization" => "Bearer #{gh_token}")
|
175
|
+
end
|
176
|
+
end
|
155
177
|
http.request(request) do |response|
|
156
178
|
case response
|
157
179
|
when Net::HTTPSuccess, Net::HTTPPartialContent
|
@@ -161,7 +183,8 @@ module Datasets
|
|
161
183
|
$stderr.puts "Redirect to #{url}"
|
162
184
|
return start_http(url, fallback_urls, headers, limit - 1, &block)
|
163
185
|
else
|
164
|
-
|
186
|
+
case response
|
187
|
+
when Net::HTTPForbidden, Net::HTTPNotFound
|
165
188
|
next_url, *rest_fallback_urls = fallback_urls
|
166
189
|
if next_url
|
167
190
|
message = "#{response.code}: #{response.message}: " +
|
data/lib/datasets/mnist.rb
CHANGED
data/lib/datasets/penguins.rb
CHANGED
@@ -26,7 +26,9 @@ module Datasets
|
|
26
26
|
super
|
27
27
|
species = self.class.name.split("::").last.downcase
|
28
28
|
@metadata.id = "palmerpenguins-#{species}"
|
29
|
-
|
29
|
+
package_id = http_parameters["packageid"]
|
30
|
+
@metadata.url = "https://portal.edirepository.org/nis/mapbrowse" +
|
31
|
+
"?packageid=#{package_id}"
|
30
32
|
@metadata.licenses = ["CC0-1.0"]
|
31
33
|
@data_path = cache_dir_path + "#{species}.csv"
|
32
34
|
end
|
@@ -46,7 +48,10 @@ module Datasets
|
|
46
48
|
end
|
47
49
|
|
48
50
|
private def open_data
|
49
|
-
download(data_path,
|
51
|
+
download(data_path,
|
52
|
+
"https://portal.edirepository.org/nis/dataviewer",
|
53
|
+
http_method: :post,
|
54
|
+
http_parameters: http_parameters)
|
50
55
|
CSV.open(data_path, headers: :first_row, converters: :all) do |csv|
|
51
56
|
yield csv
|
52
57
|
end
|
@@ -56,19 +61,37 @@ module Datasets
|
|
56
61
|
# Adelie penguin data from: https://doi.org/10.6073/pasta/abc50eed9138b75f54eaada0841b9b86
|
57
62
|
class Adelie < SpeciesBase
|
58
63
|
DOI = "doi.org/10.6073/pasta/abc50eed9138b75f54eaada0841b9b86".freeze
|
59
|
-
|
64
|
+
|
65
|
+
private def http_parameters
|
66
|
+
{
|
67
|
+
"packageid" => "knb-lter-pal.219.3",
|
68
|
+
"entityid" => "002f3893385f710df69eeebe893144ff",
|
69
|
+
}
|
70
|
+
end
|
60
71
|
end
|
61
72
|
|
62
73
|
# Chinstrap penguin data from: https://doi.org/10.6073/pasta/409c808f8fc9899d02401bdb04580af7
|
63
74
|
class Chinstrap < SpeciesBase
|
64
75
|
DOI = "doi.org/10.6073/pasta/409c808f8fc9899d02401bdb04580af7".freeze
|
65
|
-
|
76
|
+
|
77
|
+
private def http_parameters
|
78
|
+
{
|
79
|
+
"packageid" => "knb-lter-pal.221.2",
|
80
|
+
"entityid" => "fe853aa8f7a59aa84cdd3197619ef462",
|
81
|
+
}
|
82
|
+
end
|
66
83
|
end
|
67
84
|
|
68
85
|
# Gentoo penguin data from: https://doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce
|
69
86
|
class Gentoo < SpeciesBase
|
70
87
|
DOI = "doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce".freeze
|
71
|
-
|
88
|
+
|
89
|
+
private def http_parameters
|
90
|
+
{
|
91
|
+
"packageid" => "knb-lter-pal.220.3",
|
92
|
+
"entityid" => "e03b43c924f226486f2f0ab6709d2381",
|
93
|
+
}
|
94
|
+
end
|
72
95
|
end
|
73
96
|
end
|
74
97
|
|
data/lib/datasets/version.rb
CHANGED
@@ -72,46 +72,46 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
72
72
|
records = @dataset.each.to_a
|
73
73
|
assert_equal([
|
74
74
|
10,
|
75
|
-
record(Date.parse("
|
75
|
+
record(Date.parse("2025-01-24"),
|
76
76
|
"自由民主党",
|
77
77
|
"自民",
|
78
|
-
Date.parse("
|
79
|
-
|
80
|
-
|
78
|
+
Date.parse("2025-04-11"),
|
79
|
+
113,
|
80
|
+
22,
|
81
81
|
Date.parse("2025-07-28"),
|
82
82
|
19,
|
83
83
|
5,
|
84
84
|
33,
|
85
|
-
|
85
|
+
5,
|
86
86
|
52,
|
87
|
-
|
87
|
+
10,
|
88
88
|
Date.parse("2028-07-25"),
|
89
89
|
18,
|
90
90
|
5,
|
91
|
-
|
91
|
+
43,
|
92
92
|
7,
|
93
|
-
|
93
|
+
61,
|
94
94
|
12),
|
95
|
-
record(Date.parse("
|
95
|
+
record(Date.parse("2025-01-24"),
|
96
96
|
"各派に属しない議員",
|
97
97
|
"無所属",
|
98
|
-
Date.parse("
|
99
|
-
|
98
|
+
Date.parse("2025-04-11"),
|
99
|
+
9,
|
100
100
|
4,
|
101
101
|
Date.parse("2025-07-28"),
|
102
102
|
1,
|
103
103
|
0,
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
104
|
+
5,
|
105
|
+
3,
|
106
|
+
6,
|
107
|
+
3,
|
108
108
|
Date.parse("2028-07-25"),
|
109
109
|
1,
|
110
110
|
0,
|
111
|
-
3,
|
112
111
|
2,
|
113
|
-
|
114
|
-
|
112
|
+
1,
|
113
|
+
3,
|
114
|
+
1),
|
115
115
|
],
|
116
116
|
[
|
117
117
|
records.size,
|
@@ -133,20 +133,20 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
133
133
|
test("#each") do
|
134
134
|
records = @dataset.each.to_a
|
135
135
|
assert_equal([
|
136
|
-
|
137
|
-
record("
|
136
|
+
240,
|
137
|
+
record("阿達 雅志",
|
138
138
|
nil,
|
139
|
-
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/profile/
|
140
|
-
"あだち
|
139
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/profile/7014002.htm",
|
140
|
+
"あだち まさし",
|
141
141
|
"自民",
|
142
142
|
"比例",
|
143
143
|
Date.parse("2028-07-25"),
|
144
|
-
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/
|
145
|
-
[2016, 2022],
|
146
|
-
|
147
|
-
"
|
148
|
-
Date.parse("
|
149
|
-
"昭和
|
144
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/g7014002.jpg",
|
145
|
+
[2014, 2016, 2022],
|
146
|
+
3,
|
147
|
+
"総務委員会、国家基本政策委員会、災害対策特別委員会",
|
148
|
+
Date.parse("2025-04-11"),
|
149
|
+
"昭和34年9月27日京都市生まれ、福井県、大阪府で育つ。私立洛星中学・高校を経て、昭和58年東京大学法学部卒業。同年住友商事株式会社入社。鉄道車輌の輸出営業、米国車輌工場勤務後、ニューヨーク大学ロー・スクールにて比較法修士(MCJ)、法学修士(LLM)を取得。平成5年米国ニューヨーク州弁護士登録。その後同社法務部、北京駐在勤務後、平成12年退職。衆議院議員佐藤信二氏秘書。平成16年ポール・ワイス外国法事務弁護士事務所勤務。日本大学法科大学院非常勤講師、東京大学大学院情報学環特任研究員を歴任。平成26年12月繰上げ当選。平成28年9月党外交部会長(2期連続)。平成30年10月国土交通大臣政務官兼内閣府大臣政務官、令和2年内閣総理大臣補佐官(経済・外交担当)○著書「世界パラダイム・シフト」「政治家になった父から18歳の息子へ(わが家の主権者教育)」",
|
150
150
|
Date.parse("2022-11-30")),
|
151
151
|
record("渡辺 猛之",
|
152
152
|
nil,
|
@@ -158,8 +158,8 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
158
158
|
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/g7010055.jpg",
|
159
159
|
[2010, 2016, 2022],
|
160
160
|
3,
|
161
|
-
"
|
162
|
-
Date.parse("
|
161
|
+
"法務委員会(理)、議院運営委員会",
|
162
|
+
Date.parse("2025-04-11"),
|
163
163
|
"昭和43年4月18日生、岐阜県加茂郡八百津町出身。岐阜県立加茂高等学校、名古屋大学経済学部卒業。平成4年、財団法人松下政経塾入塾(第13期生)。平成7年、同塾卒業後、26歳で岐阜県議会議員に初当選。以後通算4期当選。在任中は、自民党岐阜県連副幹事長、岐阜県商工会青年部連合会会長、岐阜県商工政治連盟会長、県監査委員、県政自民クラブ幹事長を歴任。平成22年7月、参議院議員初当選○農林水産委員長、政治倫理の確立及び選挙制度に関する特別委員長、参議院自民党筆頭副幹事長、国土交通副大臣兼内閣府副大臣兼復興副大臣を歴任○現在議院運営委員会筆頭理事。環境委員",
|
164
164
|
Date.parse("2022-11-30")),
|
165
165
|
],
|
@@ -183,7 +183,7 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
183
183
|
test("#each") do
|
184
184
|
records = @dataset.each.to_a
|
185
185
|
assert_equal([
|
186
|
-
|
186
|
+
8042,
|
187
187
|
record(1,
|
188
188
|
1,
|
189
189
|
"食生活安定に関する質問主意書",
|
@@ -198,20 +198,20 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
198
198
|
Date.parse("1947-06-23"),
|
199
199
|
Date.parse("1947-06-28"),
|
200
200
|
nil),
|
201
|
-
record(
|
202
|
-
|
203
|
-
"
|
204
|
-
"
|
201
|
+
record(217,
|
202
|
+
95,
|
203
|
+
"石綿健康被害救済法による特別遺族給付金の認定に係る旧国鉄元職員の遺族及びJR元職員の遺族間の権衡に関する再質問主意書",
|
204
|
+
"福島 みずほ",
|
205
205
|
1,
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/
|
211
|
-
Date.parse("
|
212
|
-
|
213
|
-
|
214
|
-
|
206
|
+
nil,
|
207
|
+
nil,
|
208
|
+
nil,
|
209
|
+
nil,
|
210
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/217/meisai/m217095.htm",
|
211
|
+
Date.parse("2025-04-11"),
|
212
|
+
nil,
|
213
|
+
nil,
|
214
|
+
nil),
|
215
215
|
],
|
216
216
|
[
|
217
217
|
records.size,
|
data/test/test-rdataset.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-datasets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomisuker
|
8
8
|
- Kouhei Sutou
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csv
|