red-datasets 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01ddaa57da3c64de47cfd9eb2ca9ae2ec3cbcb5d35138fdd74f74009f062358f
4
- data.tar.gz: 431dba2c0e41bc25a4e2716ed20936ee2022c2b04c49683bb7d0d2e2aaa2f99e
3
+ metadata.gz: b902b34e223bc1fe0d52138006497a9eb8052829cfef85f75a62d2ef7d984165
4
+ data.tar.gz: 4fd686319f25079ca48cb78a9cfbba6b1846532bf7b3d54919379d91142f17fd
5
5
  SHA512:
6
- metadata.gz: ab12e9783e4a23b81f9bd1be22c31704f9095026cb27185a6fe985e106320982fb999e1cf2348f6b18b509a7f1a6a5b58d405ece5d541e8ddbe43cd08f252a80
7
- data.tar.gz: 157df5fffd3ba8fd021cdef3933c0a9e99a0fa7f173771e2177d1a86e79788c3250b12453f06084e24afcf624ec3283e702e6bd7595f08e2cf2b5a7dd404065c
6
+ metadata.gz: 9cfa887b557b22c5371e8785a487853ad8394f44c82d7e303bce8cbdb604b56915ea215c765c74c3ca8902e582c94c5c16bb1ebd71ca4ca69e284b293106127e
7
+ data.tar.gz: 2243686a23b77a2552c6bf80fe2dfcefba2232cdd83b36a717442e226c8b867c089e315f01eff7b592ea4b0d49354beddaadbd82a0bbb5cbb37dd9ecdbf50a1d
data/Gemfile CHANGED
@@ -3,3 +3,9 @@
3
3
  source "https://rubygems.org/"
4
4
 
5
5
  gemspec
6
+
7
+ # add steep and typeprof to development dependencies
8
+ group :development do
9
+ gem "steep", require: false
10
+ gem "typeprof"
11
+ end
data/Rakefile CHANGED
@@ -27,7 +27,7 @@ task default: :test
27
27
 
28
28
  desc "Run tests"
29
29
  task :test do
30
- ruby("test/run-test.rb")
30
+ ruby("test/run.rb")
31
31
  end
32
32
 
33
33
  desc "Generate an artifact for GitHub Pages"
data/doc/text/news.md CHANGED
@@ -1,5 +1,42 @@
1
1
  # News
2
2
 
3
+ ## 0.2.1 - 2025-10-14
4
+
5
+ ### Improvements
6
+
7
+ * `Datasets::Downloader`: Ensured using the system certifications on
8
+ HTTPS download.
9
+
10
+ * [GH-248](https://github.com/red-data-tools/red-datasets/issues/248)
11
+
12
+ ### Fixes
13
+
14
+ * `Datasets::Downloader`: Avoided duplicate downloads.
15
+
16
+ * [GH-242](https://github.com/red-data-tools/red-datasets/issues/242)
17
+
18
+ * [GH-243](https://github.com/red-data-tools/red-datasets/issues/243)
19
+
20
+ * Patch by Tsutomu Katsube
21
+
22
+ * `Datasets::Downloader`: Fixed lock validation logic.
23
+
24
+ * [GH-246](https://github.com/red-data-tools/red-datasets/issues/246)
25
+
26
+ * Patch by kojix2
27
+
28
+ ### Thanks
29
+
30
+ * Tsutomu Katsube
31
+
32
+ * kojix2
33
+
34
+ ## 0.2.0 - 2025-04-13
35
+
36
+ ### Fixes
37
+
38
+ * `Datasets::MNIST`: Fixed a bug that dataset can't be downloaded.
39
+
3
40
  ## 0.1.9 - 2025-04-08
4
41
 
5
42
  ### Improvements
@@ -20,13 +20,12 @@ module Datasets
20
20
  end
21
21
 
22
22
  def download(output_path, &block)
23
- if output_path.exist?
24
- yield_chunks(output_path, &block) if block_given?
25
- return
26
- end
23
+ return if use_cache(output_path, &block)
27
24
 
28
25
  partial_output_path = Pathname.new("#{output_path}.partial")
29
26
  synchronize(output_path, partial_output_path) do
27
+ return if use_cache(output_path, &block)
28
+
30
29
  output_path.parent.mkpath
31
30
 
32
31
  n_retries = 0
@@ -94,6 +93,15 @@ module Datasets
94
93
  url
95
94
  end
96
95
 
96
+ private def use_cache(output_path, &block)
97
+ if output_path.exist?
98
+ yield_chunks(output_path, &block) if block_given?
99
+ true
100
+ else
101
+ false
102
+ end
103
+ end
104
+
97
105
  private def synchronize(output_path, partial_output_path)
98
106
  begin
99
107
  Process.getpgid(Process.pid)
@@ -114,7 +122,7 @@ module Datasets
114
122
  # The process that acquired the lock will be exited before
115
123
  # it stores its process ID.
116
124
  elapsed_time = Time.now - lock_path.mtime
117
- valid_lock_path = (elapsed_time > 10)
125
+ valid_lock_path = (elapsed_time < 10)
118
126
  else
119
127
  begin
120
128
  Process.getpgid(pid)
@@ -150,6 +158,11 @@ module Datasets
150
158
  http = Net::HTTP.new(url.hostname, url.port)
151
159
  # http.set_debug_output($stderr)
152
160
  http.use_ssl = (url.scheme == "https")
161
+ if http.use_ssl?
162
+ store = OpenSSL::X509::Store.new
163
+ store.set_default_paths
164
+ http.cert_store = store
165
+ end
153
166
  http.start do
154
167
  path = url.path
155
168
  path += "?#{url.query}" if url.query
@@ -168,6 +181,12 @@ module Datasets
168
181
  else
169
182
  request = Net::HTTP::Get.new(path, headers)
170
183
  end
184
+ if url.scheme == "https" and url.host == "api.github.com"
185
+ gh_token = ENV["GH_TOKEN"]
186
+ if gh_token
187
+ headers = headers.merge("Authorization" => "Bearer #{gh_token}")
188
+ end
189
+ end
171
190
  http.request(request) do |response|
172
191
  case response
173
192
  when Net::HTTPSuccess, Net::HTTPPartialContent
@@ -177,7 +196,8 @@ module Datasets
177
196
  $stderr.puts "Redirect to #{url}"
178
197
  return start_http(url, fallback_urls, headers, limit - 1, &block)
179
198
  else
180
- if response.is_a?(Net::HTTPForbidden)
199
+ case response
200
+ when Net::HTTPForbidden, Net::HTTPNotFound
181
201
  next_url, *rest_fallback_urls = fallback_urls
182
202
  if next_url
183
203
  message = "#{response.code}: #{response.message}: " +
@@ -54,7 +54,6 @@ module Datasets
54
54
  private
55
55
  def base_urls
56
56
  [
57
- "http://yann.lecun.com/exdb/mnist/",
58
57
  "https://ossci-datasets.s3.amazonaws.com/mnist/",
59
58
  ]
60
59
  end
@@ -1,3 +1,3 @@
1
1
  module Datasets
2
- VERSION = "0.1.9"
2
+ VERSION = "0.2.1"
3
3
  end
@@ -1,3 +1,5 @@
1
+ require_relative "helper"
2
+
1
3
  class AozoraBunkoTest < Test::Unit::TestCase
2
4
  include Helper::PathRestorable
3
5
 
data/test/test-cifar.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require_relative "helper"
2
+
1
3
  class CIFARTest < Test::Unit::TestCase
2
4
  include Helper::Sandbox
3
5
 
@@ -14,7 +14,7 @@ class CLDRPluralsTest < Test::Unit::TestCase
14
14
  test("#each") do
15
15
  locales = @dataset.each.to_a
16
16
  assert_equal([
17
- 220,
17
+ 227,
18
18
  locale("bm",
19
19
  [
20
20
  rule("other",
data/test/test-dataset.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require_relative "helper"
2
+
1
3
  class TestDataset < Test::Unit::TestCase
2
4
  sub_test_case("#clear_cache!") do
3
5
  include Helper::PathRestorable
@@ -1,3 +1,5 @@
1
+ require_relative "helper"
2
+
1
3
  class DownloaderTest < Test::Unit::TestCase
2
4
  include Helper::Sandbox
3
5
 
@@ -71,47 +71,47 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
71
71
  test("#each") do
72
72
  records = @dataset.each.to_a
73
73
  assert_equal([
74
- 10,
75
- record(Date.parse("2024-01-26"),
74
+ 11,
75
+ record(Date.parse("2025-08-01"),
76
76
  "自由民主党",
77
77
  "自民",
78
- Date.parse("2024-05-25"),
79
- 115,
80
- 24,
81
- Date.parse("2025-07-28"),
78
+ Date.parse("2025-08-11"),
79
+ 100,
82
80
  19,
83
- 5,
84
- 33,
85
- 6,
86
- 52,
87
- 11,
88
81
  Date.parse("2028-07-25"),
89
82
  18,
90
83
  5,
91
- 45,
92
- 8,
93
- 63,
94
- 13),
95
- record(Date.parse("2024-01-26"),
84
+ 43,
85
+ 7,
86
+ 61,
87
+ 12,
88
+ Date.parse("2031-07-28"),
89
+ 12,
90
+ 3,
91
+ 27,
92
+ 4,
93
+ 39,
94
+ 7),
95
+ record(Date.parse("2025-08-01"),
96
96
  "各派に属しない議員",
97
97
  "無所属",
98
- Date.parse("2024-05-25"),
99
- 12,
98
+ Date.parse("2025-08-11"),
99
+ 9,
100
100
  4,
101
- Date.parse("2025-07-28"),
102
- 1,
103
- 0,
104
- 7,
105
- 2,
106
- 8,
107
- 2,
108
101
  Date.parse("2028-07-25"),
109
102
  1,
110
103
  0,
111
104
  3,
112
- 2,
105
+ 1,
113
106
  4,
114
- 2),
107
+ 1,
108
+ Date.parse("2031-07-28"),
109
+ 1,
110
+ 0,
111
+ 4,
112
+ 3,
113
+ 5,
114
+ 3),
115
115
  ],
116
116
  [
117
117
  records.size,
@@ -133,20 +133,20 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
133
133
  test("#each") do
134
134
  records = @dataset.each.to_a
135
135
  assert_equal([
136
- 247,
137
- record("足立 敏之",
136
+ 248,
137
+ record("青木 ",
138
138
  nil,
139
- "https://www.sangiin.go.jp/japanese/joho1/kousei/giin/profile/7016001.htm",
140
- "あだち としゆき",
141
- "自民",
139
+ "https://www.sangiin.go.jp/japanese/joho1/kousei/giin/profile/7007006.htm",
140
+ "あおき あい",
141
+ "立憲",
142
142
  "比例",
143
143
  Date.parse("2028-07-25"),
144
- "https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/g7016001.jpg",
145
- [2016, 2022],
146
- 2,
147
- "財政金融委員会(長)",
148
- Date.parse("2024-05-25"),
149
- "昭和29520日兵庫県西宮市生まれ。(本籍地・京都府福知山市)昭和48年和歌山県立桐蔭高等学校卒業、昭和52年京都大学工学部土木工学科卒業、昭和54年京都大学大学院工学研究科修士課程修了、同年建設省入省後、兵庫県庁、東北及び関東地方整備局、河川局河川計画課河川事業調整官、内閣官房(安全保障・危機管理担当)等を経て、平成15年近畿地方整備局企画部長、平成18年河川局河川計画課長、平成21年四国地方整備局長、平成23年中部地方整備局長、平成24年水管理・国土保全局長、平成25年技監、平成26年国土交通省を退職。平成28年第24回参議院議員通常選挙で初当選○参議院予算委員会理事、災害対策特別委員会理事○著書「激甚化する水害」「いいね!建設産業本当の魅力」(日経BP社)",
144
+ "https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/g7007006.jpg",
145
+ [2007, 2016, 2022],
146
+ 3,
147
+ "環境委員会、決算委員会(理)",
148
+ Date.parse("2025-08-11"),
149
+ "昭和40818日東京都墨田区生まれ。千葉大学教育学部卒業、千葉大学大学院教育学研究科修了、東京芸術大学音楽研究科研究生。社会福祉法人櫻の会理事、ゆうひが丘保育園保育士。平成15年11月衆議院議員選挙初当選。平成19年7月参議院議員選挙初当選。衆議院厚労理事、文科委員ほか。衆議院消費者問題特別委員長。民主党千葉12区総支部長、同参議院比例区総支部長。同東京12区総支部長、同副幹事長。国民の生活が第一、日本未来の党、生活の党、自由党、国民民主党、立憲民主党。その間、国土交通委員会筆頭理事、環境委員会筆頭理事。東日本大震災復興特別委員長ほか○現在立憲民主党、行政監視委員長○衆議院議員3期",
150
150
  Date.parse("2022-11-30")),
151
151
  record("渡辺 猛之",
152
152
  nil,
@@ -158,8 +158,8 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
158
158
  "https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/g7010055.jpg",
159
159
  [2010, 2016, 2022],
160
160
  3,
161
- "経済産業委員会、議院運営委員会(理)",
162
- Date.parse("2024-05-25"),
161
+ "法務委員会(理)、議院運営委員会、政治倫理審査会(幹)",
162
+ Date.parse("2025-08-11"),
163
163
  "昭和43年4月18日生、岐阜県加茂郡八百津町出身。岐阜県立加茂高等学校、名古屋大学経済学部卒業。平成4年、財団法人松下政経塾入塾(第13期生)。平成7年、同塾卒業後、26歳で岐阜県議会議員に初当選。以後通算4期当選。在任中は、自民党岐阜県連副幹事長、岐阜県商工会青年部連合会会長、岐阜県商工政治連盟会長、県監査委員、県政自民クラブ幹事長を歴任。平成22年7月、参議院議員初当選○農林水産委員長、政治倫理の確立及び選挙制度に関する特別委員長、参議院自民党筆頭副幹事長、国土交通副大臣兼内閣府副大臣兼復興副大臣を歴任○現在議院運営委員会筆頭理事。環境委員",
164
164
  Date.parse("2022-11-30")),
165
165
  ],
@@ -183,7 +183,7 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
183
183
  test("#each") do
184
184
  records = @dataset.each.to_a
185
185
  assert_equal([
186
- 7739,
186
+ 8218,
187
187
  record(1,
188
188
  1,
189
189
  "食生活安定に関する質問主意書",
@@ -198,20 +198,20 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
198
198
  Date.parse("1947-06-23"),
199
199
  Date.parse("1947-06-28"),
200
200
  nil),
201
- record(213,
202
- 145,
203
- "地方自治体職員の国籍に関する質問主意書",
204
- "神谷 宗幣",
201
+ record(218,
202
+ 24,
203
+ "オスプレイの安全性並びにオスプレイを含めた防衛装備品の調達及びプロジェクト管理に関する質問主意書",
204
+ "青木 ",
205
205
  1,
206
+ "https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/218/syuh/s218024.htm",
206
207
  nil,
208
+ "https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/218/syup/s218024.pdf",
207
209
  nil,
210
+ "https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/218/meisai/m218024.htm",
211
+ Date.parse("2025-08-05"),
212
+ Date.parse("2025-08-05"),
208
213
  nil,
209
- nil,
210
- "https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/213/meisai/m213145.htm",
211
- Date.parse("2024-05-23"),
212
- nil,
213
- nil,
214
- nil),
214
+ "8月8日内閣から通知書受領(8月15日まで答弁延期)"),
215
215
  ],
216
216
  [
217
217
  records.size,
@@ -48,7 +48,7 @@ class RdatasetTest < Test::Unit::TestCase
48
48
  test("without package_name") do
49
49
  records = @dataset.each.to_a
50
50
  assert_equal([
51
- 2293,
51
+ 3485,
52
52
  {
53
53
  package: "AER",
54
54
  dataset: "Affairs",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
8
8
  - Kouhei Sutou
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-04-08 00:00:00.000000000 Z
11
+ date: 1980-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: csv
@@ -192,7 +192,7 @@ files:
192
192
  - red-datasets.gemspec
193
193
  - test/helper.rb
194
194
  - test/japanese-date-parser-test.rb
195
- - test/run-test.rb
195
+ - test/run.rb
196
196
  - test/test-adult.rb
197
197
  - test/test-afinn.rb
198
198
  - test/test-aozora-bunko.rb
@@ -252,13 +252,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
252
252
  - !ruby/object:Gem::Version
253
253
  version: '0'
254
254
  requirements: []
255
- rubygems_version: 3.6.2
255
+ rubygems_version: 3.6.9
256
256
  specification_version: 4
257
257
  summary: Red Datasets provides classes that provide common datasets such as iris dataset.
258
258
  test_files:
259
259
  - test/helper.rb
260
260
  - test/japanese-date-parser-test.rb
261
- - test/run-test.rb
261
+ - test/run.rb
262
262
  - test/test-adult.rb
263
263
  - test/test-afinn.rb
264
264
  - test/test-aozora-bunko.rb
File without changes