red-datasets 0.0.8 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c7a9199546e7a001c97e45c6fa28db15c0d96b748e527d9705dfee4e4b1db6fd
4
- data.tar.gz: c659f6ae1e658ad91210e4427be063463124d89ef90388d34ebfb73ceb49068a
3
+ metadata.gz: 07c55b47d31b30ceaf4cdd3ea22da5c737d81884a494c6a11abc6fda6fbea22b
4
+ data.tar.gz: a28d34b5d28cb57349a81112ffc2db8fe9f94939beb21477af4d9d0c9d5b59ab
5
5
  SHA512:
6
- metadata.gz: d8a23c4a165a596df22ce5bbe1f8f0cd5c0f002deecafbb26cd5e5f75abb3c0224c1013898162a67787159258d1b801395fc4d949c17939d95940664cffd5600
7
- data.tar.gz: f2fd4eb733e6205f138c4005627e815e3787040a8a4b6cce7eca9fd5d4adaa12263e17e8f5bd9394a851e5210f28736ee3c682c81e110da304ae17fb3f0bedba
6
+ metadata.gz: 4871ac4ec167cb78b3fce8f9c5de9f6cff6a4089b8e4fd87fe7bb3265865cfcbd86935e8f2fa0bc5e40fde8a471e1655390fcdf8dcc0a5197342143e0cb855e5
7
+ data.tar.gz: 66d31943cb857632518a90166972bfa9ebe4b8ec355eca8291da40183c260e3c175d5866220efc0e4174f780c8449b45004db425a8fc7453268236f9b7dcbc1d
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Red Datasets
2
2
 
3
+ [![Build Status](https://travis-ci.org/red-data-tools/red-datasets.svg?branch=master)](https://travis-ci.org/red-data-tools/red-datasets)
4
+ [![Gem Version](https://badge.fury.io/rb/red-datasets.svg)](https://badge.fury.io/rb/red-datasets)
5
+
3
6
  ## Description
4
7
 
5
8
  Red Datasets provides classes that provide common datasets such as iris dataset.
@@ -128,6 +131,9 @@ mnist.each do |record|
128
131
  end
129
132
  ```
130
133
 
134
+ ## NArray compatibility
135
+
136
+ * [red-datasets-numo-narray](https://github.com/red-data-tools/red-datasets-numo-narray)
131
137
 
132
138
  ## License
133
139
 
data/doc/text/news.md CHANGED
@@ -1,5 +1,98 @@
1
1
  # News
2
2
 
3
+ ## 0.1.3 - 2021-07-09
4
+
5
+ ### Improvements
6
+
7
+ * `Datasets::SeabornData`: Added.
8
+
9
+ * `Datasets::SudachiSynonymDictionary`: Added.
10
+
11
+ ## 0.1.2 - 2021-06-03
12
+
13
+ ### Improvements
14
+
15
+ * `Datasets::Rdatasets` and `Datasets::RdatasetsList`: Added.
16
+
17
+ * `Datasets::Penguins`: Changed for compatibility with seaborn's
18
+ penguins dataset.
19
+
20
+ ## 0.1.1 - 2021-04-11
21
+
22
+ ### Improvements
23
+
24
+ * Added support for Ruby 3.0.
25
+
26
+ * `Datasets::Communities`: Added.
27
+ [GitHub#64][Patch by Yasuo Honda]
28
+
29
+ * `Datasets::EStatJapan`: Added.
30
+ [GitHub#90][Patch by Kunihiko Miyoshi]
31
+
32
+ * `Datasets::Penguins`: Added.
33
+ [GitHub#100][Patch by Kenta Murata]
34
+
35
+ * `Datasets::CLDRPlurals`: Added.
36
+
37
+ ### Thanks
38
+
39
+ * Yasuo Honda
40
+
41
+ * Kunihiko Miyoshi
42
+
43
+ * Kenta Murata
44
+
45
+ ## 0.1.0 - 2020-02-04
46
+
47
+ ### Improvements
48
+
49
+ * Added support for Ruby 2.7.
50
+ [GitHub#82][GitHub#83][Patch by Yasuo Honda]
51
+
52
+ * `Datasets::Hepatitis`: Added.
53
+ [GitHub#70][Patch by KazuhiroYoshimoto]
54
+
55
+ * `Datasets::Downloader`: Added support for query.
56
+
57
+ ### Thanks
58
+
59
+ * Yasuo Honda
60
+
61
+ * KazuhiroYoshimoto
62
+
63
+ ## 0.0.9 - 2019-09-09
64
+
65
+ ### Improvements
66
+
67
+ * `Datasets::LIBSVMDatasetList`: Improved performance.
68
+
69
+ * `Datasets::Mushroom`: Added.
70
+ [GitHub#33][Patch by Yasuo Honda]
71
+
72
+ * `Datasets::Table#n_columns`: Added.
73
+
74
+ * `Datasets::Table#n_rows`: Added.
75
+
76
+ * `Datasets::Table#[]`: Added support for index access.
77
+
78
+ * `Datasets::Table#coolumn_names`: Added.
79
+
80
+ * `Datasets::Table#size`: Added.
81
+
82
+ * `Datasets::Table#length`: Added.
83
+
84
+ * `Datasets::Table#each_column`: Added.
85
+
86
+ * `Datasets::Table#each_record`: Added.
87
+
88
+ * `Datasets::Table#find_record`: Added.
89
+
90
+ ### Thanks
91
+
92
+ * Yasuo Honda
93
+
94
+ ### Improvements
95
+
3
96
  ## 0.0.8 - 2019-03-24
4
97
 
5
98
  ### Improvements
data/lib/datasets.rb CHANGED
@@ -2,12 +2,21 @@ require_relative "datasets/version"
2
2
 
3
3
  require_relative "datasets/adult"
4
4
  require_relative "datasets/cifar"
5
+ require_relative "datasets/cldr-plurals"
6
+ require_relative "datasets/communities"
7
+ require_relative "datasets/e-stat-japan"
5
8
  require_relative "datasets/fashion-mnist"
9
+ require_relative "datasets/hepatitis"
6
10
  require_relative "datasets/iris"
7
11
  require_relative "datasets/libsvm"
8
12
  require_relative "datasets/libsvm-dataset-list"
9
13
  require_relative "datasets/mnist"
14
+ require_relative "datasets/mushroom"
15
+ require_relative "datasets/penguins"
10
16
  require_relative "datasets/penn-treebank"
11
17
  require_relative "datasets/postal-code-japan"
18
+ require_relative "datasets/rdatasets"
19
+ require_relative "datasets/seaborn-data"
20
+ require_relative "datasets/sudachi-synonym-dictionary"
12
21
  require_relative "datasets/wikipedia"
13
22
  require_relative "datasets/wine"
@@ -62,11 +62,12 @@ module Datasets
62
62
  data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}"
63
63
  download(data_path, data_url)
64
64
  end
65
- CSV.open(data_path,
66
- {
65
+
66
+ options = {
67
67
  converters: [:numeric, lambda {|f| f.strip}],
68
68
  skip_lines: /\A\|/,
69
- }) do |csv|
69
+ }
70
+ CSV.open(data_path, **options) do |csv|
70
71
  yield(csv)
71
72
  end
72
73
  end
@@ -1,10 +1,10 @@
1
- require "rubygems/package"
2
- require "zlib"
3
-
1
+ require_relative "tar-gz-readable"
4
2
  require_relative "dataset"
5
3
 
6
4
  module Datasets
7
5
  class CIFAR < Dataset
6
+ include TarGzReadable
7
+
8
8
  module Pixelable
9
9
  def pixels
10
10
  data.unpack("C*")
@@ -61,7 +61,7 @@ module Datasets
61
61
  private
62
62
 
63
63
  def parse_data(data_path, &block)
64
- open_tar(data_path) do |tar|
64
+ open_tar_gz(data_path) do |tar|
65
65
  target_file_names.each do |target_file_name|
66
66
  tar.seek(target_file_name) do |entry|
67
67
  parse_entry(entry, &block)
@@ -124,14 +124,6 @@ module Datasets
124
124
  end
125
125
  end
126
126
  end
127
-
128
- def open_tar(data_path)
129
- Zlib::GzipReader.open(data_path) do |f|
130
- Gem::Package::TarReader.new(f) do |tar|
131
- yield(tar)
132
- end
133
- end
134
- end
135
127
  end
136
128
  end
137
129
 
@@ -0,0 +1,385 @@
1
+ require "rexml/streamlistener"
2
+ require "rexml/parsers/baseparser"
3
+ require "rexml/parsers/streamparser"
4
+ require "strscan"
5
+
6
+ require_relative "dataset"
7
+
8
+ module Datasets
9
+ class CLDRPlurals < Dataset
10
+ Locale = Struct.new(:name,
11
+ :rules)
12
+
13
+ Rule = Struct.new(:count,
14
+ :condition,
15
+ :integer_samples,
16
+ :decimal_samples)
17
+
18
+ def initialize
19
+ super()
20
+ @metadata.id = "cldr-plurals"
21
+ @metadata.name = "CLDR language plural rules"
22
+ @metadata.url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/plurals.xml"
23
+ @metadata.licenses = ["Unicode-DFS-2016"]
24
+ @metadata.description = <<~DESCRIPTION
25
+ Language plural rules in Unicode Common Locale Data Repository.
26
+ See also: https://unicode-org.github.io/cldr-staging/charts/latest/supplemental/language_plural_rules.html
27
+ DESCRIPTION
28
+ end
29
+
30
+ def each(&block)
31
+ return to_enum(__method__) unless block_given?
32
+
33
+ open_data do |input|
34
+ catch do |abort_tag|
35
+ listener = Listener.new(abort_tag, &block)
36
+ parser = REXML::Parsers::StreamParser.new(input, listener)
37
+ parser.parse
38
+ end
39
+ end
40
+ end
41
+
42
+ private
43
+ def open_data
44
+ data_path = cache_dir_path + "plurals.xml"
45
+ unless data_path.exist?
46
+ download(data_path, @metadata.url)
47
+ end
48
+ ::File.open(data_path) do |input|
49
+ yield(input)
50
+ end
51
+ end
52
+
53
+ # Spec: https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
54
+ class Listener
55
+ include REXML::StreamListener
56
+
57
+ def initialize(abort_tag, &block)
58
+ @abort_tag = abort_tag
59
+ @block = block
60
+ @tag_name_stack = []
61
+ end
62
+
63
+ def tag_start(name, attributes)
64
+ @tag_name_stack.push(name)
65
+ case name
66
+ when "pluralRules"
67
+ @locales = attributes["locales"].split
68
+ @rules = []
69
+ when "pluralRule"
70
+ @rule = Rule.new(attributes["count"])
71
+ end
72
+ end
73
+
74
+ def tag_end(name)
75
+ case name
76
+ when "pluralRules"
77
+ @locales.each do |locale_name|
78
+ @block.call(Locale.new(locale_name, @rules))
79
+ end
80
+ when "pluralRule"
81
+ @rules << @rule
82
+ end
83
+ @tag_name_stack.pop
84
+ end
85
+
86
+ def text(data)
87
+ case @tag_name_stack.last
88
+ when "pluralRule"
89
+ parse_plural_rule(data)
90
+ end
91
+ end
92
+
93
+ private
94
+ def parse_plural_rule(data)
95
+ parser = RuleParser.new(@rule, data)
96
+ parser.parse
97
+ end
98
+ end
99
+ private_constant :Listener
100
+
101
+ # Syntax: http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax
102
+ class RuleParser
103
+ def initialize(rule, data)
104
+ @rule = rule
105
+ @data = data
106
+ @scanner = StringScanner.new(@data)
107
+ end
108
+
109
+ def parse
110
+ @rule.condition = parse_condition
111
+ skip_whitespaces
112
+ if @scanner.scan(/@integer/)
113
+ @rule.integer_samples = parse_sample_list
114
+ end
115
+ skip_whitespaces
116
+ if @scanner.scan(/@decimal/)
117
+ @rule.decimal_samples = parse_sample_list
118
+ end
119
+ end
120
+
121
+ private
122
+ def skip_whitespaces
123
+ @scanner.skip(/\p{Pattern_White_Space}+/)
124
+ end
125
+
126
+ def parse_condition
127
+ and_condition = parse_and_condition
128
+ return nil if and_condition.nil?
129
+ and_conditions = [and_condition]
130
+ while parse_or
131
+ and_conditions << parse_and_condition
132
+ end
133
+ if and_conditions.size == 1
134
+ and_condition
135
+ else
136
+ [:or, *and_conditions]
137
+ end
138
+ end
139
+
140
+ def parse_or
141
+ skip_whitespaces
142
+ @scanner.scan(/or/)
143
+ end
144
+
145
+ def parse_and_condition
146
+ skip_whitespaces
147
+ relation = parse_relation
148
+ return nil if relation.nil?
149
+ relations = [relation]
150
+ while parse_and
151
+ relations << parse_relation
152
+ end
153
+ if relations.size == 1
154
+ relation
155
+ else
156
+ [:and, *relations]
157
+ end
158
+ end
159
+
160
+ def parse_and
161
+ skip_whitespaces
162
+ @scanner.scan(/and/)
163
+ end
164
+
165
+ def parse_relation
166
+ parse_is_relation or
167
+ parse_in_relation or
168
+ parse_within_relation
169
+ end
170
+
171
+ def parse_is_relation
172
+ position = @scanner.pos
173
+ skip_whitespaces
174
+ expr = parse_expr
175
+ unless parse_is
176
+ @scanner.pos = position
177
+ return nil
178
+ end
179
+ if parse_not
180
+ operator = :is_not
181
+ else
182
+ operator = :is
183
+ end
184
+ value = parse_value
185
+ if value.nil?
186
+ raise Error, "no value for #{operator}: #{@scanner.inspect}"
187
+ end
188
+ [operator, expr, value]
189
+ end
190
+
191
+ def parse_is
192
+ skip_whitespaces
193
+ @scanner.scan(/is/)
194
+ end
195
+
196
+ def parse_not
197
+ skip_whitespaces
198
+ @scanner.scan(/not/)
199
+ end
200
+
201
+ def parse_in_relation
202
+ position = @scanner.pos
203
+ skip_whitespaces
204
+ expr = parse_expr
205
+ if parse_not
206
+ if parse_in
207
+ operator = :not_in
208
+ else
209
+ @scanner.ops = position
210
+ return nil
211
+ end
212
+ elsif parse_in
213
+ operator = :in
214
+ elsif parse_equal
215
+ operator = :equal
216
+ elsif parse_not_equal
217
+ operator = :not_equal
218
+ else
219
+ @scanner.pos = position
220
+ return nil
221
+ end
222
+ range_list = parse_range_list
223
+ [operator, expr, range_list]
224
+ end
225
+
226
+ def parse_in
227
+ skip_whitespaces
228
+ @scanner.scan(/in/)
229
+ end
230
+
231
+ def parse_equal
232
+ skip_whitespaces
233
+ @scanner.scan(/=/)
234
+ end
235
+
236
+ def parse_not_equal
237
+ skip_whitespaces
238
+ @scanner.scan(/!=/)
239
+ end
240
+
241
+ def parse_within_relation
242
+ position = @scanner.pos
243
+ skip_whitespaces
244
+ expr = parse_expr
245
+ have_not = parse_not
246
+ unless parse_within
247
+ @scanner.pos = position
248
+ return nil
249
+ end
250
+ if have_not
251
+ operator = :not_within
252
+ else
253
+ operator = :within
254
+ end
255
+ range_list = parse_range_list
256
+ [operator, expr, range_list]
257
+ end
258
+
259
+ def parse_within
260
+ skip_whitespaces
261
+ @scanner.scan(/within/)
262
+ end
263
+
264
+ def parse_expr
265
+ operand = parse_operand
266
+ operator = parse_expr_operator
267
+ if operator
268
+ value = parse_value
269
+ if value.nil?
270
+ raise Error, "no value for #{operator}: #{@scanner.inspect}"
271
+ end
272
+ [operator, operand, value]
273
+ else
274
+ operand
275
+ end
276
+ end
277
+
278
+ def parse_operand
279
+ skip_whitespaces
280
+ @scanner.scan(/[niftvwce]/)
281
+ end
282
+
283
+ def parse_expr_operator
284
+ skip_whitespaces
285
+ if @scanner.scan(/(?:mod|%)/)
286
+ :mod
287
+ else
288
+ nil
289
+ end
290
+ end
291
+
292
+ def parse_range_list
293
+ ranges = [parse_range || parse_value]
294
+ loop do
295
+ skip_whitespaces
296
+ break unless @scanner.scan(/,/)
297
+ ranges << (parse_range || parse_value)
298
+ end
299
+ ranges
300
+ end
301
+
302
+ def parse_range
303
+ position = @scanner.pos
304
+ range_start = parse_value
305
+ skip_whitespaces
306
+ unless @scanner.scan(/\.\./)
307
+ @scanner.pos = position
308
+ return nil
309
+ end
310
+ range_end = parse_value
311
+ range_start..range_end
312
+ end
313
+
314
+ def parse_value
315
+ skip_whitespaces
316
+ value = @scanner.scan(/\d+/)
317
+ return nil if value.nil?
318
+ Integer(value, 10)
319
+ end
320
+
321
+ def parse_sample_list
322
+ samples = [parse_sample_range]
323
+ loop do
324
+ position = @scanner.pos
325
+ skip_whitespaces
326
+ break unless @scanner.scan(/,/)
327
+ sample_range = parse_sample_range
328
+ unless sample_range
329
+ @scanner.pos = position
330
+ break
331
+ end
332
+ samples << sample_range
333
+ end
334
+ skip_whitespaces
335
+ if @scanner.scan(/,/)
336
+ skip_whitespaces
337
+ # U+2026 HORIZONTAL ELLIPSIS
338
+ unless @scanner.scan(/\u2026|\.\.\./)
339
+ raise Error, "no ellipsis: #{@scanner.inspect}"
340
+ end
341
+ samples << :elipsis
342
+ end
343
+ samples
344
+ end
345
+
346
+ def parse_sample_range
347
+ value = parse_sample_value
348
+ return nil if value.nil?
349
+ skip_whitespaces
350
+ if @scanner.scan(/~/)
351
+ range_end = parse_sample_value
352
+ value..range_end
353
+ else
354
+ value
355
+ end
356
+ end
357
+
358
+ def parse_sample_value
359
+ value = parse_value
360
+ return nil if value.nil?
361
+ if @scanner.scan(/\./)
362
+ skip_whitespaces
363
+ decimal = @scanner.scan(/[0-9]+/)
364
+ if decimal.nil?
365
+ raise Error, "no decimal: #{@scanner.inspect}"
366
+ end
367
+ value += Float("0.#{decimal}")
368
+ skip_whitespaces
369
+ end
370
+ if @scanner.scan(/[ce]/)
371
+ # Workardoun for a spec bug. "e1" should be accepted.
372
+ #
373
+ # Spec:
374
+ # sampleValue = value ('.' digit+)? ([ce] digitPos digit+)?
375
+ # digit = [0-9]
376
+ # digitPos = [1-9]
377
+ e = @scanner.scan(/[1-9][0-9]*/)
378
+ value *= 10 * Integer(e, 10)
379
+ end
380
+ value
381
+ end
382
+ end
383
+ private_constant :RuleParser
384
+ end
385
+ end