red-datasets 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +6 -0
- data/Rakefile +1 -1
- data/doc/text/news.md +31 -0
- data/lib/datasets/downloader.rb +18 -5
- data/lib/datasets/version.rb +1 -1
- data/test/test-aozora-bunko.rb +2 -0
- data/test/test-cifar.rb +2 -0
- data/test/test-cldr-plurals.rb +1 -1
- data/test/test-dataset.rb +2 -0
- data/test/test-downloader.rb +2 -0
- data/test/test-house-of-councillor.rb +45 -45
- data/test/test-rdataset.rb +1 -1
- metadata +5 -5
- /data/test/{run-test.rb → run.rb} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b902b34e223bc1fe0d52138006497a9eb8052829cfef85f75a62d2ef7d984165
|
4
|
+
data.tar.gz: 4fd686319f25079ca48cb78a9cfbba6b1846532bf7b3d54919379d91142f17fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cfa887b557b22c5371e8785a487853ad8394f44c82d7e303bce8cbdb604b56915ea215c765c74c3ca8902e582c94c5c16bb1ebd71ca4ca69e284b293106127e
|
7
|
+
data.tar.gz: 2243686a23b77a2552c6bf80fe2dfcefba2232cdd83b36a717442e226c8b867c089e315f01eff7b592ea4b0d49354beddaadbd82a0bbb5cbb37dd9ecdbf50a1d
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
data/doc/text/news.md
CHANGED
@@ -1,5 +1,36 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.2.1 - 2025-10-14
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* `Datasets::Downloader`: Ensured using the system certifications on
|
8
|
+
HTTPS download.
|
9
|
+
|
10
|
+
* [GH-248](https://github.com/red-data-tools/red-datasets/issues/248)
|
11
|
+
|
12
|
+
### Fixes
|
13
|
+
|
14
|
+
* `Datasets::Downloader`: Avoided duplicate downloads.
|
15
|
+
|
16
|
+
* [GH-242](https://github.com/red-data-tools/red-datasets/issues/242)
|
17
|
+
|
18
|
+
* [GH-243](https://github.com/red-data-tools/red-datasets/issues/243)
|
19
|
+
|
20
|
+
* Patch by Tsutomu Katsube
|
21
|
+
|
22
|
+
* `Datasets::Downloader`: Fixed lock validation logic.
|
23
|
+
|
24
|
+
* [GH-246](https://github.com/red-data-tools/red-datasets/issues/246)
|
25
|
+
|
26
|
+
* Patch by kojix2
|
27
|
+
|
28
|
+
### Thanks
|
29
|
+
|
30
|
+
* Tsutomu Katsube
|
31
|
+
|
32
|
+
* kojix2
|
33
|
+
|
3
34
|
## 0.2.0 - 2025-04-13
|
4
35
|
|
5
36
|
### Fixes
|
data/lib/datasets/downloader.rb
CHANGED
@@ -20,13 +20,12 @@ module Datasets
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def download(output_path, &block)
|
23
|
-
if output_path
|
24
|
-
yield_chunks(output_path, &block) if block_given?
|
25
|
-
return
|
26
|
-
end
|
23
|
+
return if use_cache(output_path, &block)
|
27
24
|
|
28
25
|
partial_output_path = Pathname.new("#{output_path}.partial")
|
29
26
|
synchronize(output_path, partial_output_path) do
|
27
|
+
return if use_cache(output_path, &block)
|
28
|
+
|
30
29
|
output_path.parent.mkpath
|
31
30
|
|
32
31
|
n_retries = 0
|
@@ -94,6 +93,15 @@ module Datasets
|
|
94
93
|
url
|
95
94
|
end
|
96
95
|
|
96
|
+
private def use_cache(output_path, &block)
|
97
|
+
if output_path.exist?
|
98
|
+
yield_chunks(output_path, &block) if block_given?
|
99
|
+
true
|
100
|
+
else
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
97
105
|
private def synchronize(output_path, partial_output_path)
|
98
106
|
begin
|
99
107
|
Process.getpgid(Process.pid)
|
@@ -114,7 +122,7 @@ module Datasets
|
|
114
122
|
# The process that acquired the lock will be exited before
|
115
123
|
# it stores its process ID.
|
116
124
|
elapsed_time = Time.now - lock_path.mtime
|
117
|
-
valid_lock_path = (elapsed_time
|
125
|
+
valid_lock_path = (elapsed_time < 10)
|
118
126
|
else
|
119
127
|
begin
|
120
128
|
Process.getpgid(pid)
|
@@ -150,6 +158,11 @@ module Datasets
|
|
150
158
|
http = Net::HTTP.new(url.hostname, url.port)
|
151
159
|
# http.set_debug_output($stderr)
|
152
160
|
http.use_ssl = (url.scheme == "https")
|
161
|
+
if http.use_ssl?
|
162
|
+
store = OpenSSL::X509::Store.new
|
163
|
+
store.set_default_paths
|
164
|
+
http.cert_store = store
|
165
|
+
end
|
153
166
|
http.start do
|
154
167
|
path = url.path
|
155
168
|
path += "?#{url.query}" if url.query
|
data/lib/datasets/version.rb
CHANGED
data/test/test-aozora-bunko.rb
CHANGED
data/test/test-cifar.rb
CHANGED
data/test/test-cldr-plurals.rb
CHANGED
data/test/test-dataset.rb
CHANGED
data/test/test-downloader.rb
CHANGED
@@ -71,47 +71,47 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
71
71
|
test("#each") do
|
72
72
|
records = @dataset.each.to_a
|
73
73
|
assert_equal([
|
74
|
-
|
75
|
-
record(Date.parse("2025-01
|
74
|
+
11,
|
75
|
+
record(Date.parse("2025-08-01"),
|
76
76
|
"自由民主党",
|
77
77
|
"自民",
|
78
|
-
Date.parse("2025-
|
79
|
-
|
80
|
-
22,
|
81
|
-
Date.parse("2025-07-28"),
|
78
|
+
Date.parse("2025-08-11"),
|
79
|
+
100,
|
82
80
|
19,
|
83
|
-
5,
|
84
|
-
33,
|
85
|
-
5,
|
86
|
-
52,
|
87
|
-
10,
|
88
81
|
Date.parse("2028-07-25"),
|
89
82
|
18,
|
90
83
|
5,
|
91
84
|
43,
|
92
85
|
7,
|
93
86
|
61,
|
94
|
-
12
|
95
|
-
|
87
|
+
12,
|
88
|
+
Date.parse("2031-07-28"),
|
89
|
+
12,
|
90
|
+
3,
|
91
|
+
27,
|
92
|
+
4,
|
93
|
+
39,
|
94
|
+
7),
|
95
|
+
record(Date.parse("2025-08-01"),
|
96
96
|
"各派に属しない議員",
|
97
97
|
"無所属",
|
98
|
-
Date.parse("2025-
|
98
|
+
Date.parse("2025-08-11"),
|
99
99
|
9,
|
100
100
|
4,
|
101
|
-
Date.parse("
|
101
|
+
Date.parse("2028-07-25"),
|
102
102
|
1,
|
103
103
|
0,
|
104
|
-
5,
|
105
|
-
3,
|
106
|
-
6,
|
107
104
|
3,
|
108
|
-
Date.parse("2028-07-25"),
|
109
105
|
1,
|
110
|
-
|
111
|
-
2,
|
106
|
+
4,
|
112
107
|
1,
|
108
|
+
Date.parse("2031-07-28"),
|
109
|
+
1,
|
110
|
+
0,
|
111
|
+
4,
|
113
112
|
3,
|
114
|
-
|
113
|
+
5,
|
114
|
+
3),
|
115
115
|
],
|
116
116
|
[
|
117
117
|
records.size,
|
@@ -133,20 +133,20 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
133
133
|
test("#each") do
|
134
134
|
records = @dataset.each.to_a
|
135
135
|
assert_equal([
|
136
|
-
|
137
|
-
record("
|
136
|
+
248,
|
137
|
+
record("青木 愛",
|
138
138
|
nil,
|
139
|
-
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/profile/
|
140
|
-
"
|
141
|
-
"
|
139
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/profile/7007006.htm",
|
140
|
+
"あおき あい",
|
141
|
+
"立憲",
|
142
142
|
"比例",
|
143
143
|
Date.parse("2028-07-25"),
|
144
|
-
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/
|
145
|
-
[
|
144
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/g7007006.jpg",
|
145
|
+
[2007, 2016, 2022],
|
146
146
|
3,
|
147
|
-
"
|
148
|
-
Date.parse("2025-
|
149
|
-
"昭和
|
147
|
+
"環境委員会、決算委員会(理)",
|
148
|
+
Date.parse("2025-08-11"),
|
149
|
+
"昭和40年8月18日東京都墨田区生まれ。千葉大学教育学部卒業、千葉大学大学院教育学研究科修了、東京芸術大学音楽研究科研究生。社会福祉法人櫻の会理事、ゆうひが丘保育園保育士。平成15年11月衆議院議員選挙初当選。平成19年7月参議院議員選挙初当選。衆議院厚労理事、文科委員ほか。衆議院消費者問題特別委員長。民主党千葉12区総支部長、同参議院比例区総支部長。同東京12区総支部長、同副幹事長。国民の生活が第一、日本未来の党、生活の党、自由党、国民民主党、立憲民主党。その間、国土交通委員会筆頭理事、環境委員会筆頭理事。東日本大震災復興特別委員長ほか○現在立憲民主党、行政監視委員長○衆議院議員3期",
|
150
150
|
Date.parse("2022-11-30")),
|
151
151
|
record("渡辺 猛之",
|
152
152
|
nil,
|
@@ -158,8 +158,8 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
158
158
|
"https://www.sangiin.go.jp/japanese/joho1/kousei/giin/photo/g7010055.jpg",
|
159
159
|
[2010, 2016, 2022],
|
160
160
|
3,
|
161
|
-
"
|
162
|
-
Date.parse("2025-
|
161
|
+
"法務委員会(理)、議院運営委員会、政治倫理審査会(幹)",
|
162
|
+
Date.parse("2025-08-11"),
|
163
163
|
"昭和43年4月18日生、岐阜県加茂郡八百津町出身。岐阜県立加茂高等学校、名古屋大学経済学部卒業。平成4年、財団法人松下政経塾入塾(第13期生)。平成7年、同塾卒業後、26歳で岐阜県議会議員に初当選。以後通算4期当選。在任中は、自民党岐阜県連副幹事長、岐阜県商工会青年部連合会会長、岐阜県商工政治連盟会長、県監査委員、県政自民クラブ幹事長を歴任。平成22年7月、参議院議員初当選○農林水産委員長、政治倫理の確立及び選挙制度に関する特別委員長、参議院自民党筆頭副幹事長、国土交通副大臣兼内閣府副大臣兼復興副大臣を歴任○現在議院運営委員会筆頭理事。環境委員",
|
164
164
|
Date.parse("2022-11-30")),
|
165
165
|
],
|
@@ -183,7 +183,7 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
183
183
|
test("#each") do
|
184
184
|
records = @dataset.each.to_a
|
185
185
|
assert_equal([
|
186
|
-
|
186
|
+
8218,
|
187
187
|
record(1,
|
188
188
|
1,
|
189
189
|
"食生活安定に関する質問主意書",
|
@@ -198,20 +198,20 @@ class HouseOfCouncillorTest < Test::Unit::TestCase
|
|
198
198
|
Date.parse("1947-06-23"),
|
199
199
|
Date.parse("1947-06-28"),
|
200
200
|
nil),
|
201
|
-
record(
|
202
|
-
|
203
|
-
"
|
204
|
-
"
|
201
|
+
record(218,
|
202
|
+
24,
|
203
|
+
"オスプレイの安全性並びにオスプレイを含めた防衛装備品の調達及びプロジェクト管理に関する質問主意書",
|
204
|
+
"青木 愛",
|
205
205
|
1,
|
206
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/218/syuh/s218024.htm",
|
206
207
|
nil,
|
208
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/218/syup/s218024.pdf",
|
207
209
|
nil,
|
210
|
+
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/218/meisai/m218024.htm",
|
211
|
+
Date.parse("2025-08-05"),
|
212
|
+
Date.parse("2025-08-05"),
|
208
213
|
nil,
|
209
|
-
|
210
|
-
"https://www.sangiin.go.jp/japanese/joho1/kousei/syuisyo/217/meisai/m217095.htm",
|
211
|
-
Date.parse("2025-04-11"),
|
212
|
-
nil,
|
213
|
-
nil,
|
214
|
-
nil),
|
214
|
+
"8月8日内閣から通知書受領(8月15日まで答弁延期)"),
|
215
215
|
],
|
216
216
|
[
|
217
217
|
records.size,
|
data/test/test-rdataset.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-datasets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomisuker
|
8
8
|
- Kouhei Sutou
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csv
|
@@ -192,7 +192,7 @@ files:
|
|
192
192
|
- red-datasets.gemspec
|
193
193
|
- test/helper.rb
|
194
194
|
- test/japanese-date-parser-test.rb
|
195
|
-
- test/run
|
195
|
+
- test/run.rb
|
196
196
|
- test/test-adult.rb
|
197
197
|
- test/test-afinn.rb
|
198
198
|
- test/test-aozora-bunko.rb
|
@@ -252,13 +252,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
252
252
|
- !ruby/object:Gem::Version
|
253
253
|
version: '0'
|
254
254
|
requirements: []
|
255
|
-
rubygems_version: 3.6.
|
255
|
+
rubygems_version: 3.6.9
|
256
256
|
specification_version: 4
|
257
257
|
summary: Red Datasets provides classes that provide common datasets such as iris dataset.
|
258
258
|
test_files:
|
259
259
|
- test/helper.rb
|
260
260
|
- test/japanese-date-parser-test.rb
|
261
|
-
- test/run
|
261
|
+
- test/run.rb
|
262
262
|
- test/test-adult.rb
|
263
263
|
- test/test-afinn.rb
|
264
264
|
- test/test-aozora-bunko.rb
|
File without changes
|