red-datasets 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 120492172aae9cec1c4fc4f3b73575cb5349caf2f0b67d70676c8896324e1491
4
- data.tar.gz: e46eb3f2875cb407e86cc0976eff7d612beb62ca6b421a51435b5d5e1bfa6e03
3
+ metadata.gz: '0239c4ab86dd9f589b1f67b9d6c381570e25a29289c261470943ed48f7dfc3d0'
4
+ data.tar.gz: 2f3f3af1f17a1bd1e7aa307e2b182108790549754d907262105e18479997cde6
5
5
  SHA512:
6
- metadata.gz: 360bbf78c131f20a67359ddc2055cd58502da1f4e95adf30475cd405d5eb50be6ba4fd9aa0a0857226dc803e14282cc4231de113843e96657a65e287c7500137
7
- data.tar.gz: f88ed1ae8c8f0dad9f4d8904a265c833ceee723ba92860c0e3bed4c193d56a901c31184abd4290058de47fbc089b12b4d3b1da064f138214e2954d45eee928da
6
+ metadata.gz: 04b3dbc23dc8679855a6104a9f3da39871594979f149295ef13b3be864a3dbbdb6bec3fb59153db9b5be4fade6819686e13b60a38f1d1721bf7e1163d4bb49b8
7
+ data.tar.gz: 476a9081fe0db32aad8a4e00c7e08f77002e58a2f2c68eb37aecf2a70054d43877707ddda29137b361a5a37ff979f3c28c5f2a03c2d1e96cbc7f7289f659ba9f
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Red Datasets
2
2
 
3
+ [![Build Status](https://travis-ci.org/red-data-tools/red-datasets.svg?branch=master)](https://travis-ci.org/red-data-tools/red-datasets)
4
+ [![Gem Version](https://badge.fury.io/rb/red-datasets.svg)](https://badge.fury.io/rb/red-datasets)
5
+
3
6
  ## Description
4
7
 
5
8
  Red Datasets provides classes that provide common datasets such as iris dataset.
@@ -128,6 +131,9 @@ mnist.each do |record|
128
131
  end
129
132
  ```
130
133
 
134
+ ## NArray compatibility
135
+
136
+ * [red-datasets-numo-narray](https://github.com/red-data-tools/red-datasets-numo-narray)
131
137
 
132
138
  ## License
133
139
 
data/doc/text/news.md CHANGED
@@ -1,5 +1,30 @@
1
1
  # News
2
2
 
3
+ ## 0.1.1 - 2021-04-11
4
+
5
+ ### Improvements
6
+
7
+ * Added support for Ruby 3.0.
8
+
9
+ * `Datasets::Communities`: Added.
10
+ [GitHub#64][Patch by Yasuo Honda]
11
+
12
+ * `Datasets::EStatJapan`: Added.
13
+ [GitHub#90][Patch by Kunihiko Miyoshi]
14
+
15
+ * `Datasets::Penguins`: Added.
16
+ [GitHub#100][Patch by Kenta Murata]
17
+
18
+ * `Datasets::CLDRPlurals`: Added.
19
+
20
+ ### Thanks
21
+
22
+ * Yasuo Honda
23
+
24
+ * Kunihiko Miyoshi
25
+
26
+ * Kenta Murata
27
+
3
28
  ## 0.1.0 - 2020-02-04
4
29
 
5
30
  ### Improvements
data/lib/datasets.rb CHANGED
@@ -2,6 +2,9 @@ require_relative "datasets/version"
2
2
 
3
3
  require_relative "datasets/adult"
4
4
  require_relative "datasets/cifar"
5
+ require_relative "datasets/cldr-plurals"
6
+ require_relative "datasets/communities"
7
+ require_relative "datasets/e-stat-japan"
5
8
  require_relative "datasets/fashion-mnist"
6
9
  require_relative "datasets/hepatitis"
7
10
  require_relative "datasets/iris"
@@ -9,6 +12,7 @@ require_relative "datasets/libsvm"
9
12
  require_relative "datasets/libsvm-dataset-list"
10
13
  require_relative "datasets/mnist"
11
14
  require_relative "datasets/mushroom"
15
+ require_relative "datasets/penguins"
12
16
  require_relative "datasets/penn-treebank"
13
17
  require_relative "datasets/postal-code-japan"
14
18
  require_relative "datasets/wikipedia"
@@ -0,0 +1,385 @@
1
+ require "rexml/streamlistener"
2
+ require "rexml/parsers/baseparser"
3
+ require "rexml/parsers/streamparser"
4
+ require "strscan"
5
+
6
+ require_relative "dataset"
7
+
8
+ module Datasets
9
+ class CLDRPlurals < Dataset
10
+ Locale = Struct.new(:name,
11
+ :rules)
12
+
13
+ Rule = Struct.new(:count,
14
+ :condition,
15
+ :integer_samples,
16
+ :decimal_samples)
17
+
18
+ def initialize
19
+ super()
20
+ @metadata.id = "cldr-plurals"
21
+ @metadata.name = "CLDR language plural rules"
22
+ @metadata.url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/plurals.xml"
23
+ @metadata.licenses = ["Unicode-DFS-2016"]
24
+ @metadata.description = <<~DESCRIPTION
25
+ Language plural rules in Unicode Common Locale Data Repository.
26
+ See also: https://unicode-org.github.io/cldr-staging/charts/latest/supplemental/language_plural_rules.html
27
+ DESCRIPTION
28
+ end
29
+
30
+ def each(&block)
31
+ return to_enum(__method__) unless block_given?
32
+
33
+ open_data do |input|
34
+ catch do |abort_tag|
35
+ listener = Listener.new(abort_tag, &block)
36
+ parser = REXML::Parsers::StreamParser.new(input, listener)
37
+ parser.parse
38
+ end
39
+ end
40
+ end
41
+
42
+ private
43
+ def open_data
44
+ data_path = cache_dir_path + "plurals.xml"
45
+ unless data_path.exist?
46
+ download(data_path, @metadata.url)
47
+ end
48
+ ::File.open(data_path) do |input|
49
+ yield(input)
50
+ end
51
+ end
52
+
53
+ # Spec: https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
54
+ class Listener
55
+ include REXML::StreamListener
56
+
57
+ def initialize(abort_tag, &block)
58
+ @abort_tag = abort_tag
59
+ @block = block
60
+ @tag_name_stack = []
61
+ end
62
+
63
+ def tag_start(name, attributes)
64
+ @tag_name_stack.push(name)
65
+ case name
66
+ when "pluralRules"
67
+ @locales = attributes["locales"].split
68
+ @rules = []
69
+ when "pluralRule"
70
+ @rule = Rule.new(attributes["count"])
71
+ end
72
+ end
73
+
74
+ def tag_end(name)
75
+ case name
76
+ when "pluralRules"
77
+ @locales.each do |locale_name|
78
+ @block.call(Locale.new(locale_name, @rules))
79
+ end
80
+ when "pluralRule"
81
+ @rules << @rule
82
+ end
83
+ @tag_name_stack.pop
84
+ end
85
+
86
+ def text(data)
87
+ case @tag_name_stack.last
88
+ when "pluralRule"
89
+ parse_plural_rule(data)
90
+ end
91
+ end
92
+
93
+ private
94
+ def parse_plural_rule(data)
95
+ parser = RuleParser.new(@rule, data)
96
+ parser.parse
97
+ end
98
+ end
99
+ private_constant :Listener
100
+
101
+ # Syntax: http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax
102
+ class RuleParser
103
+ def initialize(rule, data)
104
+ @rule = rule
105
+ @data = data
106
+ @scanner = StringScanner.new(@data)
107
+ end
108
+
109
+ def parse
110
+ @rule.condition = parse_condition
111
+ skip_whitespaces
112
+ if @scanner.scan(/@integer/)
113
+ @rule.integer_samples = parse_sample_list
114
+ end
115
+ skip_whitespaces
116
+ if @scanner.scan(/@decimal/)
117
+ @rule.decimal_samples = parse_sample_list
118
+ end
119
+ end
120
+
121
+ private
122
+ def skip_whitespaces
123
+ @scanner.skip(/\p{Pattern_White_Space}+/)
124
+ end
125
+
126
+ def parse_condition
127
+ and_condition = parse_and_condition
128
+ return nil if and_condition.nil?
129
+ and_conditions = [and_condition]
130
+ while parse_or
131
+ and_conditions << parse_and_condition
132
+ end
133
+ if and_conditions.size == 1
134
+ and_condition
135
+ else
136
+ [:or, *and_conditions]
137
+ end
138
+ end
139
+
140
+ def parse_or
141
+ skip_whitespaces
142
+ @scanner.scan(/or/)
143
+ end
144
+
145
+ def parse_and_condition
146
+ skip_whitespaces
147
+ relation = parse_relation
148
+ return nil if relation.nil?
149
+ relations = [relation]
150
+ while parse_and
151
+ relations << parse_relation
152
+ end
153
+ if relations.size == 1
154
+ relation
155
+ else
156
+ [:and, *relations]
157
+ end
158
+ end
159
+
160
+ def parse_and
161
+ skip_whitespaces
162
+ @scanner.scan(/and/)
163
+ end
164
+
165
+ def parse_relation
166
+ parse_is_relation or
167
+ parse_in_relation or
168
+ parse_within_relation
169
+ end
170
+
171
+ def parse_is_relation
172
+ position = @scanner.pos
173
+ skip_whitespaces
174
+ expr = parse_expr
175
+ unless parse_is
176
+ @scanner.pos = position
177
+ return nil
178
+ end
179
+ if parse_not
180
+ operator = :is_not
181
+ else
182
+ operator = :is
183
+ end
184
+ value = parse_value
185
+ if value.nil?
186
+ raise Error.new("no value for #{operator}: #{@scanner.inspect}")
187
+ end
188
+ [operator, expr, value]
189
+ end
190
+
191
+ def parse_is
192
+ skip_whitespaces
193
+ @scanner.scan(/is/)
194
+ end
195
+
196
+ def parse_not
197
+ skip_whitespaces
198
+ @scanner.scan(/not/)
199
+ end
200
+
201
+ def parse_in_relation
202
+ position = @scanner.pos
203
+ skip_whitespaces
204
+ expr = parse_expr
205
+ if parse_not
206
+ if parse_in
207
+ operator = :not_in
208
+ else
209
+ @scanner.ops = position
210
+ return nil
211
+ end
212
+ elsif parse_in
213
+ operator = :in
214
+ elsif parse_equal
215
+ operator = :equal
216
+ elsif parse_not_equal
217
+ operator = :not_equal
218
+ else
219
+ @scanner.pos = position
220
+ return nil
221
+ end
222
+ range_list = parse_range_list
223
+ [operator, expr, range_list]
224
+ end
225
+
226
+ def parse_in
227
+ skip_whitespaces
228
+ @scanner.scan(/in/)
229
+ end
230
+
231
+ def parse_equal
232
+ skip_whitespaces
233
+ @scanner.scan(/=/)
234
+ end
235
+
236
+ def parse_not_equal
237
+ skip_whitespaces
238
+ @scanner.scan(/!=/)
239
+ end
240
+
241
+ def parse_within_relation
242
+ position = @scanner.pos
243
+ skip_whitespaces
244
+ expr = parse_expr
245
+ have_not = parse_not
246
+ unless parse_within
247
+ @scanner.pos = position
248
+ return nil
249
+ end
250
+ if have_not
251
+ operator = :not_within
252
+ else
253
+ operator = :within
254
+ end
255
+ range_list = parse_range_list
256
+ [operator, expr, range_list]
257
+ end
258
+
259
+ def parse_within
260
+ skip_whitespaces
261
+ @scanner.scan(/within/)
262
+ end
263
+
264
+ def parse_expr
265
+ operand = parse_operand
266
+ operator = parse_expr_operator
267
+ if operator
268
+ value = parse_value
269
+ if value.nil?
270
+ raise Error.new("no value for #{operator}: #{@scanner.inspect}")
271
+ end
272
+ [operator, operand, value]
273
+ else
274
+ operand
275
+ end
276
+ end
277
+
278
+ def parse_operand
279
+ skip_whitespaces
280
+ @scanner.scan(/[niftvwce]/)
281
+ end
282
+
283
+ def parse_expr_operator
284
+ skip_whitespaces
285
+ if @scanner.scan(/(?:mod|%)/)
286
+ :mod
287
+ else
288
+ nil
289
+ end
290
+ end
291
+
292
+ def parse_range_list
293
+ ranges = [parse_range || parse_value]
294
+ loop do
295
+ skip_whitespaces
296
+ break unless @scanner.scan(/,/)
297
+ ranges << (parse_range || parse_value)
298
+ end
299
+ ranges
300
+ end
301
+
302
+ def parse_range
303
+ position = @scanner.pos
304
+ range_start = parse_value
305
+ skip_whitespaces
306
+ unless @scanner.scan(/\.\./)
307
+ @scanner.pos = position
308
+ return nil
309
+ end
310
+ range_end = parse_value
311
+ range_start..range_end
312
+ end
313
+
314
+ def parse_value
315
+ skip_whitespaces
316
+ value = @scanner.scan(/\d+/)
317
+ return nil if value.nil?
318
+ Integer(value, 10)
319
+ end
320
+
321
+ def parse_sample_list
322
+ samples = [parse_sample_range]
323
+ loop do
324
+ position = @scanner.pos
325
+ skip_whitespaces
326
+ break unless @scanner.scan(/,/)
327
+ sample_range = parse_sample_range
328
+ unless sample_range
329
+ @scanner.pos = position
330
+ break
331
+ end
332
+ samples << sample_range
333
+ end
334
+ skip_whitespaces
335
+ if @scanner.scan(/,/)
336
+ skip_whitespaces
337
+ # U+2026 HORIZONTAL ELLIPSIS
338
+ unless @scanner.scan(/\u2026|\.\.\./)
339
+ raise "no ellipsis: #{@scanner.inspect}"
340
+ end
341
+ samples << :elipsis
342
+ end
343
+ samples
344
+ end
345
+
346
+ def parse_sample_range
347
+ value = parse_sample_value
348
+ return nil if value.nil?
349
+ skip_whitespaces
350
+ if @scanner.scan(/~/)
351
+ range_end = parse_sample_value
352
+ value..range_end
353
+ else
354
+ value
355
+ end
356
+ end
357
+
358
+ def parse_sample_value
359
+ value = parse_value
360
+ return nil if value.nil?
361
+ if @scanner.scan(/\./)
362
+ skip_whitespaces
363
+ decimal = @scanner.scan(/[0-9]+/)
364
+ if decimal.nil?
365
+ raise "no decimal: #{@scanner.inspect}"
366
+ end
367
+ value += Float("0.#{decimal}")
368
+ skip_whitespaces
369
+ end
370
+ if @scanner.scan(/[ce]/)
371
+ # Workardoun for a spec bug. "e1" should be accepted.
372
+ #
373
+ # Spec:
374
+ # sampleValue = value ('.' digit+)? ([ce] digitPos digit+)?
375
+ # digit = [0-9]
376
+ # digitPos = [1-9]
377
+ e = @scanner.scan(/[1-9][0-9]*/)
378
+ value *= 10 * Integer(e, 10)
379
+ end
380
+ value
381
+ end
382
+ end
383
+ private_constant :RuleParser
384
+ end
385
+ end