red-datasets 0.0.7 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +20 -4
- data/doc/text/news.md +102 -0
- data/lib/datasets.rb +19 -9
- data/lib/datasets/adult.rb +4 -3
- data/lib/datasets/cifar.rb +4 -12
- data/lib/datasets/cldr-plurals.rb +385 -0
- data/lib/datasets/communities.rb +198 -0
- data/lib/datasets/dataset.rb +20 -1
- data/lib/datasets/downloader.rb +54 -26
- data/lib/datasets/e-stat-japan.rb +320 -0
- data/lib/datasets/error.rb +4 -0
- data/lib/datasets/hepatitis.rb +207 -0
- data/lib/datasets/libsvm-dataset-list.rb +277 -0
- data/lib/datasets/libsvm.rb +135 -0
- data/lib/datasets/mnist.rb +0 -2
- data/lib/datasets/mushroom.rb +256 -0
- data/lib/datasets/penguins.rb +146 -0
- data/lib/datasets/postal-code-japan.rb +154 -0
- data/lib/datasets/rdatasets.rb +95 -0
- data/lib/datasets/table.rb +83 -3
- data/lib/datasets/tar_gz_readable.rb +14 -0
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets/wikipedia.rb +2 -10
- data/red-datasets.gemspec +4 -0
- data/test/run-test.rb +2 -0
- data/test/test-cldr-plurals.rb +180 -0
- data/test/test-communities.rb +290 -0
- data/test/test-dataset.rb +27 -0
- data/test/test-downloader.rb +29 -0
- data/test/test-e-stat-japan.rb +383 -0
- data/test/test-hepatitis.rb +74 -0
- data/test/test-libsvm-dataset-list.rb +47 -0
- data/test/test-libsvm.rb +205 -0
- data/test/test-mushroom.rb +80 -0
- data/test/test-penguins.rb +251 -0
- data/test/test-postal-code-japan.rb +69 -0
- data/test/test-rdatasets.rb +136 -0
- data/test/test-table.rb +123 -18
- metadata +88 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6fbd4d11063f89ba2e09250b751886086c953ec8bc92c75a6a351c31a36da0c4
|
4
|
+
data.tar.gz: acc6ff31f0f4ae3a6c6565fe569233c01615718c01300b0838ff744571edc34d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 26361511155b447ffed56a79b2336a9a1db96494bf856b23e7b39cc6a8b6a2039e7ed27564140761bdb2daaae7ee563b3695c464a7a7b21ff93b0636f6b8338d
|
7
|
+
data.tar.gz: 40446f90e410e0d86abeec186a1d7adcc5375e29c19dc934f823befb26a87d904458ef5ea18c9d64055493d29ed305dba53d6e4d86bd7d84488baf3745ebd792
|
data/README.md
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
#
|
1
|
+
# Red Datasets
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
Red Datasets
|
3
|
+
[![Build Status](https://travis-ci.org/red-data-tools/red-datasets.svg?branch=master)](https://travis-ci.org/red-data-tools/red-datasets)
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/red-datasets.svg)](https://badge.fury.io/rb/red-datasets)
|
6
5
|
|
7
6
|
## Description
|
8
7
|
|
@@ -16,6 +15,20 @@ You can use datasets easily because you can access each dataset with multiple wa
|
|
16
15
|
% gem install red-datasets
|
17
16
|
```
|
18
17
|
|
18
|
+
## Available datasets
|
19
|
+
|
20
|
+
TODO: Document them in source code to list in document: https://www.rubydoc.info/gems/red-datasets
|
21
|
+
|
22
|
+
* Adult Dataset
|
23
|
+
* CIFAR-10 Dataset
|
24
|
+
* CIFAR-100 Dataset
|
25
|
+
* Fashion-MNIST
|
26
|
+
* Iris Dataset
|
27
|
+
* MNIST database
|
28
|
+
* The Penn Treebank Project
|
29
|
+
* Wikipedia
|
30
|
+
* Wine Dataset
|
31
|
+
|
19
32
|
## Usage
|
20
33
|
|
21
34
|
Here is an example to access [Iris Data Set](https://archive.ics.uci.edu/ml/datasets/iris) by `#each` or `Table#to_h` or `Table#fetch_values`.
|
@@ -118,6 +131,9 @@ mnist.each do |record|
|
|
118
131
|
end
|
119
132
|
```
|
120
133
|
|
134
|
+
## NArray compatibility
|
135
|
+
|
136
|
+
* [red-datasets-numo-narray](https://github.com/red-data-tools/red-datasets-numo-narray)
|
121
137
|
|
122
138
|
## License
|
123
139
|
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,107 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.1.2 - 2021-06-03
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* `Datasets::Rdatasets` and `Datasets::RdatasetsList`: Added.
|
8
|
+
|
9
|
+
* `Datasets::Penguins`: Changed for compatibility with seaborn's
|
10
|
+
penguins dataset.
|
11
|
+
|
12
|
+
## 0.1.1 - 2021-04-11
|
13
|
+
|
14
|
+
### Improvements
|
15
|
+
|
16
|
+
* Added support for Ruby 3.0.
|
17
|
+
|
18
|
+
* `Datasets::Communities`: Added.
|
19
|
+
[GitHub#64][Patch by Yasuo Honda]
|
20
|
+
|
21
|
+
* `Datasets::EStatJapan`: Added.
|
22
|
+
[GitHub#90][Patch by Kunihiko Miyoshi]
|
23
|
+
|
24
|
+
* `Datasets::Penguins`: Added.
|
25
|
+
[GitHub#100][Patch by Kenta Murata]
|
26
|
+
|
27
|
+
* `Datasets::CLDRPlurals`: Added.
|
28
|
+
|
29
|
+
### Thanks
|
30
|
+
|
31
|
+
* Yasuo Honda
|
32
|
+
|
33
|
+
* Kunihiko Miyoshi
|
34
|
+
|
35
|
+
* Kenta Murata
|
36
|
+
|
37
|
+
## 0.1.0 - 2020-02-04
|
38
|
+
|
39
|
+
### Improvements
|
40
|
+
|
41
|
+
* Added support for Ruby 2.7.
|
42
|
+
[GitHub#82][GitHub#83][Patch by Yasuo Honda]
|
43
|
+
|
44
|
+
* `Datasets::Hepatitis`: Added.
|
45
|
+
[GitHub#70][Patch by KazuhiroYoshimoto]
|
46
|
+
|
47
|
+
* `Datasets::Downloader`: Added support for query.
|
48
|
+
|
49
|
+
### Thanks
|
50
|
+
|
51
|
+
* Yasuo Honda
|
52
|
+
|
53
|
+
* KazuhiroYoshimoto
|
54
|
+
|
55
|
+
## 0.0.9 - 2019-09-09
|
56
|
+
|
57
|
+
### Improvements
|
58
|
+
|
59
|
+
* `Datasets::LIBSVMDatasetList`: Improved performance.
|
60
|
+
|
61
|
+
* `Datasets::Mushroom`: Added.
|
62
|
+
[GitHub#33][Patch by Yasuo Honda]
|
63
|
+
|
64
|
+
* `Datasets::Table#n_columns`: Added.
|
65
|
+
|
66
|
+
* `Datasets::Table#n_rows`: Added.
|
67
|
+
|
68
|
+
* `Datasets::Table#[]`: Added support for index access.
|
69
|
+
|
70
|
+
* `Datasets::Table#coolumn_names`: Added.
|
71
|
+
|
72
|
+
* `Datasets::Table#size`: Added.
|
73
|
+
|
74
|
+
* `Datasets::Table#length`: Added.
|
75
|
+
|
76
|
+
* `Datasets::Table#each_column`: Added.
|
77
|
+
|
78
|
+
* `Datasets::Table#each_record`: Added.
|
79
|
+
|
80
|
+
* `Datasets::Table#find_record`: Added.
|
81
|
+
|
82
|
+
### Thanks
|
83
|
+
|
84
|
+
* Yasuo Honda
|
85
|
+
|
86
|
+
### Improvements
|
87
|
+
|
88
|
+
## 0.0.8 - 2019-03-24
|
89
|
+
|
90
|
+
### Improvements
|
91
|
+
|
92
|
+
* Improved README.
|
93
|
+
[GitHub#40][Patch by kojix2]
|
94
|
+
|
95
|
+
* `Datasets::PostalCodeJapan`: Added.
|
96
|
+
|
97
|
+
* `Datasets::LIBSVMDatasetList`: Added.
|
98
|
+
|
99
|
+
* `Datasets::LIBSVM`: Added.
|
100
|
+
|
101
|
+
### Thanks
|
102
|
+
|
103
|
+
* kojix2
|
104
|
+
|
3
105
|
## 0.0.7 - 2018-11-21
|
4
106
|
|
5
107
|
### Improvements
|
data/lib/datasets.rb
CHANGED
@@ -1,10 +1,20 @@
|
|
1
|
-
|
1
|
+
require_relative "datasets/version"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
3
|
+
require_relative "datasets/adult"
|
4
|
+
require_relative "datasets/cifar"
|
5
|
+
require_relative "datasets/cldr-plurals"
|
6
|
+
require_relative "datasets/communities"
|
7
|
+
require_relative "datasets/e-stat-japan"
|
8
|
+
require_relative "datasets/fashion-mnist"
|
9
|
+
require_relative "datasets/hepatitis"
|
10
|
+
require_relative "datasets/iris"
|
11
|
+
require_relative "datasets/libsvm"
|
12
|
+
require_relative "datasets/libsvm-dataset-list"
|
13
|
+
require_relative "datasets/mnist"
|
14
|
+
require_relative "datasets/mushroom"
|
15
|
+
require_relative "datasets/penguins"
|
16
|
+
require_relative "datasets/penn-treebank"
|
17
|
+
require_relative "datasets/postal-code-japan"
|
18
|
+
require_relative "datasets/rdatasets"
|
19
|
+
require_relative "datasets/wikipedia"
|
20
|
+
require_relative "datasets/wine"
|
data/lib/datasets/adult.rb
CHANGED
@@ -62,11 +62,12 @@ module Datasets
|
|
62
62
|
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}"
|
63
63
|
download(data_path, data_url)
|
64
64
|
end
|
65
|
-
|
66
|
-
|
65
|
+
|
66
|
+
options = {
|
67
67
|
converters: [:numeric, lambda {|f| f.strip}],
|
68
68
|
skip_lines: /\A\|/,
|
69
|
-
|
69
|
+
}
|
70
|
+
CSV.open(data_path, **options) do |csv|
|
70
71
|
yield(csv)
|
71
72
|
end
|
72
73
|
end
|
data/lib/datasets/cifar.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
|
2
|
-
require "zlib"
|
3
|
-
|
1
|
+
require_relative "tar_gz_readable"
|
4
2
|
require_relative "dataset"
|
5
3
|
|
6
4
|
module Datasets
|
7
5
|
class CIFAR < Dataset
|
6
|
+
include TarGzReadable
|
7
|
+
|
8
8
|
module Pixelable
|
9
9
|
def pixels
|
10
10
|
data.unpack("C*")
|
@@ -61,7 +61,7 @@ module Datasets
|
|
61
61
|
private
|
62
62
|
|
63
63
|
def parse_data(data_path, &block)
|
64
|
-
|
64
|
+
open_tar_gz(data_path) do |tar|
|
65
65
|
target_file_names.each do |target_file_name|
|
66
66
|
tar.seek(target_file_name) do |entry|
|
67
67
|
parse_entry(entry, &block)
|
@@ -124,14 +124,6 @@ module Datasets
|
|
124
124
|
end
|
125
125
|
end
|
126
126
|
end
|
127
|
-
|
128
|
-
def open_tar(data_path)
|
129
|
-
Zlib::GzipReader.open(data_path) do |f|
|
130
|
-
Gem::Package::TarReader.new(f) do |tar|
|
131
|
-
yield(tar)
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
127
|
end
|
136
128
|
end
|
137
129
|
|
@@ -0,0 +1,385 @@
|
|
1
|
+
require "rexml/streamlistener"
|
2
|
+
require "rexml/parsers/baseparser"
|
3
|
+
require "rexml/parsers/streamparser"
|
4
|
+
require "strscan"
|
5
|
+
|
6
|
+
require_relative "dataset"
|
7
|
+
|
8
|
+
module Datasets
|
9
|
+
class CLDRPlurals < Dataset
|
10
|
+
Locale = Struct.new(:name,
|
11
|
+
:rules)
|
12
|
+
|
13
|
+
Rule = Struct.new(:count,
|
14
|
+
:condition,
|
15
|
+
:integer_samples,
|
16
|
+
:decimal_samples)
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
super()
|
20
|
+
@metadata.id = "cldr-plurals"
|
21
|
+
@metadata.name = "CLDR language plural rules"
|
22
|
+
@metadata.url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/plurals.xml"
|
23
|
+
@metadata.licenses = ["Unicode-DFS-2016"]
|
24
|
+
@metadata.description = <<~DESCRIPTION
|
25
|
+
Language plural rules in Unicode Common Locale Data Repository.
|
26
|
+
See also: https://unicode-org.github.io/cldr-staging/charts/latest/supplemental/language_plural_rules.html
|
27
|
+
DESCRIPTION
|
28
|
+
end
|
29
|
+
|
30
|
+
def each(&block)
|
31
|
+
return to_enum(__method__) unless block_given?
|
32
|
+
|
33
|
+
open_data do |input|
|
34
|
+
catch do |abort_tag|
|
35
|
+
listener = Listener.new(abort_tag, &block)
|
36
|
+
parser = REXML::Parsers::StreamParser.new(input, listener)
|
37
|
+
parser.parse
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def open_data
|
44
|
+
data_path = cache_dir_path + "plurals.xml"
|
45
|
+
unless data_path.exist?
|
46
|
+
download(data_path, @metadata.url)
|
47
|
+
end
|
48
|
+
::File.open(data_path) do |input|
|
49
|
+
yield(input)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Spec: https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
|
54
|
+
class Listener
|
55
|
+
include REXML::StreamListener
|
56
|
+
|
57
|
+
def initialize(abort_tag, &block)
|
58
|
+
@abort_tag = abort_tag
|
59
|
+
@block = block
|
60
|
+
@tag_name_stack = []
|
61
|
+
end
|
62
|
+
|
63
|
+
def tag_start(name, attributes)
|
64
|
+
@tag_name_stack.push(name)
|
65
|
+
case name
|
66
|
+
when "pluralRules"
|
67
|
+
@locales = attributes["locales"].split
|
68
|
+
@rules = []
|
69
|
+
when "pluralRule"
|
70
|
+
@rule = Rule.new(attributes["count"])
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def tag_end(name)
|
75
|
+
case name
|
76
|
+
when "pluralRules"
|
77
|
+
@locales.each do |locale_name|
|
78
|
+
@block.call(Locale.new(locale_name, @rules))
|
79
|
+
end
|
80
|
+
when "pluralRule"
|
81
|
+
@rules << @rule
|
82
|
+
end
|
83
|
+
@tag_name_stack.pop
|
84
|
+
end
|
85
|
+
|
86
|
+
def text(data)
|
87
|
+
case @tag_name_stack.last
|
88
|
+
when "pluralRule"
|
89
|
+
parse_plural_rule(data)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
def parse_plural_rule(data)
|
95
|
+
parser = RuleParser.new(@rule, data)
|
96
|
+
parser.parse
|
97
|
+
end
|
98
|
+
end
|
99
|
+
private_constant :Listener
|
100
|
+
|
101
|
+
# Syntax: http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax
|
102
|
+
class RuleParser
|
103
|
+
def initialize(rule, data)
|
104
|
+
@rule = rule
|
105
|
+
@data = data
|
106
|
+
@scanner = StringScanner.new(@data)
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse
|
110
|
+
@rule.condition = parse_condition
|
111
|
+
skip_whitespaces
|
112
|
+
if @scanner.scan(/@integer/)
|
113
|
+
@rule.integer_samples = parse_sample_list
|
114
|
+
end
|
115
|
+
skip_whitespaces
|
116
|
+
if @scanner.scan(/@decimal/)
|
117
|
+
@rule.decimal_samples = parse_sample_list
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
def skip_whitespaces
|
123
|
+
@scanner.skip(/\p{Pattern_White_Space}+/)
|
124
|
+
end
|
125
|
+
|
126
|
+
def parse_condition
|
127
|
+
and_condition = parse_and_condition
|
128
|
+
return nil if and_condition.nil?
|
129
|
+
and_conditions = [and_condition]
|
130
|
+
while parse_or
|
131
|
+
and_conditions << parse_and_condition
|
132
|
+
end
|
133
|
+
if and_conditions.size == 1
|
134
|
+
and_condition
|
135
|
+
else
|
136
|
+
[:or, *and_conditions]
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def parse_or
|
141
|
+
skip_whitespaces
|
142
|
+
@scanner.scan(/or/)
|
143
|
+
end
|
144
|
+
|
145
|
+
def parse_and_condition
|
146
|
+
skip_whitespaces
|
147
|
+
relation = parse_relation
|
148
|
+
return nil if relation.nil?
|
149
|
+
relations = [relation]
|
150
|
+
while parse_and
|
151
|
+
relations << parse_relation
|
152
|
+
end
|
153
|
+
if relations.size == 1
|
154
|
+
relation
|
155
|
+
else
|
156
|
+
[:and, *relations]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def parse_and
|
161
|
+
skip_whitespaces
|
162
|
+
@scanner.scan(/and/)
|
163
|
+
end
|
164
|
+
|
165
|
+
def parse_relation
|
166
|
+
parse_is_relation or
|
167
|
+
parse_in_relation or
|
168
|
+
parse_within_relation
|
169
|
+
end
|
170
|
+
|
171
|
+
def parse_is_relation
|
172
|
+
position = @scanner.pos
|
173
|
+
skip_whitespaces
|
174
|
+
expr = parse_expr
|
175
|
+
unless parse_is
|
176
|
+
@scanner.pos = position
|
177
|
+
return nil
|
178
|
+
end
|
179
|
+
if parse_not
|
180
|
+
operator = :is_not
|
181
|
+
else
|
182
|
+
operator = :is
|
183
|
+
end
|
184
|
+
value = parse_value
|
185
|
+
if value.nil?
|
186
|
+
raise Error.new("no value for #{operator}: #{@scanner.inspect}")
|
187
|
+
end
|
188
|
+
[operator, expr, value]
|
189
|
+
end
|
190
|
+
|
191
|
+
def parse_is
|
192
|
+
skip_whitespaces
|
193
|
+
@scanner.scan(/is/)
|
194
|
+
end
|
195
|
+
|
196
|
+
def parse_not
|
197
|
+
skip_whitespaces
|
198
|
+
@scanner.scan(/not/)
|
199
|
+
end
|
200
|
+
|
201
|
+
def parse_in_relation
|
202
|
+
position = @scanner.pos
|
203
|
+
skip_whitespaces
|
204
|
+
expr = parse_expr
|
205
|
+
if parse_not
|
206
|
+
if parse_in
|
207
|
+
operator = :not_in
|
208
|
+
else
|
209
|
+
@scanner.ops = position
|
210
|
+
return nil
|
211
|
+
end
|
212
|
+
elsif parse_in
|
213
|
+
operator = :in
|
214
|
+
elsif parse_equal
|
215
|
+
operator = :equal
|
216
|
+
elsif parse_not_equal
|
217
|
+
operator = :not_equal
|
218
|
+
else
|
219
|
+
@scanner.pos = position
|
220
|
+
return nil
|
221
|
+
end
|
222
|
+
range_list = parse_range_list
|
223
|
+
[operator, expr, range_list]
|
224
|
+
end
|
225
|
+
|
226
|
+
def parse_in
|
227
|
+
skip_whitespaces
|
228
|
+
@scanner.scan(/in/)
|
229
|
+
end
|
230
|
+
|
231
|
+
def parse_equal
|
232
|
+
skip_whitespaces
|
233
|
+
@scanner.scan(/=/)
|
234
|
+
end
|
235
|
+
|
236
|
+
def parse_not_equal
|
237
|
+
skip_whitespaces
|
238
|
+
@scanner.scan(/!=/)
|
239
|
+
end
|
240
|
+
|
241
|
+
def parse_within_relation
|
242
|
+
position = @scanner.pos
|
243
|
+
skip_whitespaces
|
244
|
+
expr = parse_expr
|
245
|
+
have_not = parse_not
|
246
|
+
unless parse_within
|
247
|
+
@scanner.pos = position
|
248
|
+
return nil
|
249
|
+
end
|
250
|
+
if have_not
|
251
|
+
operator = :not_within
|
252
|
+
else
|
253
|
+
operator = :within
|
254
|
+
end
|
255
|
+
range_list = parse_range_list
|
256
|
+
[operator, expr, range_list]
|
257
|
+
end
|
258
|
+
|
259
|
+
def parse_within
|
260
|
+
skip_whitespaces
|
261
|
+
@scanner.scan(/within/)
|
262
|
+
end
|
263
|
+
|
264
|
+
def parse_expr
|
265
|
+
operand = parse_operand
|
266
|
+
operator = parse_expr_operator
|
267
|
+
if operator
|
268
|
+
value = parse_value
|
269
|
+
if value.nil?
|
270
|
+
raise Error.new("no value for #{operator}: #{@scanner.inspect}")
|
271
|
+
end
|
272
|
+
[operator, operand, value]
|
273
|
+
else
|
274
|
+
operand
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def parse_operand
|
279
|
+
skip_whitespaces
|
280
|
+
@scanner.scan(/[niftvwce]/)
|
281
|
+
end
|
282
|
+
|
283
|
+
def parse_expr_operator
|
284
|
+
skip_whitespaces
|
285
|
+
if @scanner.scan(/(?:mod|%)/)
|
286
|
+
:mod
|
287
|
+
else
|
288
|
+
nil
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def parse_range_list
|
293
|
+
ranges = [parse_range || parse_value]
|
294
|
+
loop do
|
295
|
+
skip_whitespaces
|
296
|
+
break unless @scanner.scan(/,/)
|
297
|
+
ranges << (parse_range || parse_value)
|
298
|
+
end
|
299
|
+
ranges
|
300
|
+
end
|
301
|
+
|
302
|
+
def parse_range
|
303
|
+
position = @scanner.pos
|
304
|
+
range_start = parse_value
|
305
|
+
skip_whitespaces
|
306
|
+
unless @scanner.scan(/\.\./)
|
307
|
+
@scanner.pos = position
|
308
|
+
return nil
|
309
|
+
end
|
310
|
+
range_end = parse_value
|
311
|
+
range_start..range_end
|
312
|
+
end
|
313
|
+
|
314
|
+
def parse_value
|
315
|
+
skip_whitespaces
|
316
|
+
value = @scanner.scan(/\d+/)
|
317
|
+
return nil if value.nil?
|
318
|
+
Integer(value, 10)
|
319
|
+
end
|
320
|
+
|
321
|
+
def parse_sample_list
|
322
|
+
samples = [parse_sample_range]
|
323
|
+
loop do
|
324
|
+
position = @scanner.pos
|
325
|
+
skip_whitespaces
|
326
|
+
break unless @scanner.scan(/,/)
|
327
|
+
sample_range = parse_sample_range
|
328
|
+
unless sample_range
|
329
|
+
@scanner.pos = position
|
330
|
+
break
|
331
|
+
end
|
332
|
+
samples << sample_range
|
333
|
+
end
|
334
|
+
skip_whitespaces
|
335
|
+
if @scanner.scan(/,/)
|
336
|
+
skip_whitespaces
|
337
|
+
# U+2026 HORIZONTAL ELLIPSIS
|
338
|
+
unless @scanner.scan(/\u2026|\.\.\./)
|
339
|
+
raise "no ellipsis: #{@scanner.inspect}"
|
340
|
+
end
|
341
|
+
samples << :elipsis
|
342
|
+
end
|
343
|
+
samples
|
344
|
+
end
|
345
|
+
|
346
|
+
def parse_sample_range
|
347
|
+
value = parse_sample_value
|
348
|
+
return nil if value.nil?
|
349
|
+
skip_whitespaces
|
350
|
+
if @scanner.scan(/~/)
|
351
|
+
range_end = parse_sample_value
|
352
|
+
value..range_end
|
353
|
+
else
|
354
|
+
value
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
def parse_sample_value
|
359
|
+
value = parse_value
|
360
|
+
return nil if value.nil?
|
361
|
+
if @scanner.scan(/\./)
|
362
|
+
skip_whitespaces
|
363
|
+
decimal = @scanner.scan(/[0-9]+/)
|
364
|
+
if decimal.nil?
|
365
|
+
raise "no decimal: #{@scanner.inspect}"
|
366
|
+
end
|
367
|
+
value += Float("0.#{decimal}")
|
368
|
+
skip_whitespaces
|
369
|
+
end
|
370
|
+
if @scanner.scan(/[ce]/)
|
371
|
+
# Workardoun for a spec bug. "e1" should be accepted.
|
372
|
+
#
|
373
|
+
# Spec:
|
374
|
+
# sampleValue = value ('.' digit+)? ([ce] digitPos digit+)?
|
375
|
+
# digit = [0-9]
|
376
|
+
# digitPos = [1-9]
|
377
|
+
e = @scanner.scan(/[1-9][0-9]*/)
|
378
|
+
value *= 10 * Integer(e, 10)
|
379
|
+
end
|
380
|
+
value
|
381
|
+
end
|
382
|
+
end
|
383
|
+
private_constant :RuleParser
|
384
|
+
end
|
385
|
+
end
|