red-datasets 0.0.6 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -7
- data/doc/text/news.md +124 -0
- data/lib/datasets.rb +18 -6
- data/lib/datasets/adult.rb +84 -0
- data/lib/datasets/cldr-plurals.rb +385 -0
- data/lib/datasets/communities.rb +198 -0
- data/lib/datasets/dataset.rb +13 -0
- data/lib/datasets/dictionary.rb +59 -0
- data/lib/datasets/downloader.rb +37 -62
- data/lib/datasets/e-stat-japan.rb +320 -0
- data/lib/datasets/error.rb +4 -0
- data/lib/datasets/fashion-mnist.rb +12 -0
- data/lib/datasets/hepatitis.rb +207 -0
- data/lib/datasets/iris.rb +1 -1
- data/lib/datasets/libsvm-dataset-list.rb +277 -0
- data/lib/datasets/libsvm.rb +135 -0
- data/lib/datasets/mnist.rb +11 -8
- data/lib/datasets/mushroom.rb +256 -0
- data/lib/datasets/penguins.rb +125 -0
- data/lib/datasets/penn-treebank.rb +2 -9
- data/lib/datasets/postal-code-japan.rb +154 -0
- data/lib/datasets/table.rb +99 -3
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets/wikipedia.rb +2 -10
- data/lib/datasets/wine.rb +64 -0
- data/red-datasets.gemspec +4 -0
- data/test/helper.rb +1 -0
- data/test/run-test.rb +2 -0
- data/test/test-adult.rb +126 -0
- data/test/test-cldr-plurals.rb +180 -0
- data/test/test-communities.rb +290 -0
- data/test/test-dictionary.rb +43 -0
- data/test/test-e-stat-japan.rb +383 -0
- data/test/test-fashion-mnist.rb +137 -0
- data/test/test-hepatitis.rb +74 -0
- data/test/test-libsvm-dataset-list.rb +47 -0
- data/test/test-libsvm.rb +205 -0
- data/test/test-mnist.rb +95 -70
- data/test/test-mushroom.rb +80 -0
- data/test/test-penguins.rb +239 -0
- data/test/test-penn-treebank.rb +6 -6
- data/test/test-postal-code-japan.rb +69 -0
- data/test/test-table.rb +144 -19
- data/test/test-wine.rb +58 -0
- metadata +89 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0239c4ab86dd9f589b1f67b9d6c381570e25a29289c261470943ed48f7dfc3d0'
|
4
|
+
data.tar.gz: 2f3f3af1f17a1bd1e7aa307e2b182108790549754d907262105e18479997cde6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04b3dbc23dc8679855a6104a9f3da39871594979f149295ef13b3be864a3dbbdb6bec3fb59153db9b5be4fade6819686e13b60a38f1d1721bf7e1163d4bb49b8
|
7
|
+
data.tar.gz: 476a9081fe0db32aad8a4e00c7e08f77002e58a2f2c68eb37aecf2a70054d43877707ddda29137b361a5a37ff979f3c28c5f2a03c2d1e96cbc7f7289f659ba9f
|
data/README.md
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
#
|
1
|
+
# Red Datasets
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
Red Datasets
|
3
|
+
[![Build Status](https://travis-ci.org/red-data-tools/red-datasets.svg?branch=master)](https://travis-ci.org/red-data-tools/red-datasets)
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/red-datasets.svg)](https://badge.fury.io/rb/red-datasets)
|
6
5
|
|
7
6
|
## Description
|
8
7
|
|
@@ -16,6 +15,20 @@ You can use datasets easily because you can access each dataset with multiple wa
|
|
16
15
|
% gem install red-datasets
|
17
16
|
```
|
18
17
|
|
18
|
+
## Available datasets
|
19
|
+
|
20
|
+
TODO: Document them in source code to list in document: https://www.rubydoc.info/gems/red-datasets
|
21
|
+
|
22
|
+
* Adult Dataset
|
23
|
+
* CIFAR-10 Dataset
|
24
|
+
* CIFAR-100 Dataset
|
25
|
+
* Fashion-MNIST
|
26
|
+
* Iris Dataset
|
27
|
+
* MNIST database
|
28
|
+
* The Penn Treebank Project
|
29
|
+
* Wikipedia
|
30
|
+
* Wine Dataset
|
31
|
+
|
19
32
|
## Usage
|
20
33
|
|
21
34
|
Here is an example to access [Iris Data Set](https://archive.ics.uci.edu/ml/datasets/iris) by `#each` or `Table#to_h` or `Table#fetch_values`.
|
@@ -30,7 +43,7 @@ iris.each do |record|
|
|
30
43
|
record.sepal_width,
|
31
44
|
record.petal_length,
|
32
45
|
record.petal_width,
|
33
|
-
record.
|
46
|
+
record.label,
|
34
47
|
]
|
35
48
|
end
|
36
49
|
# => [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]
|
@@ -48,7 +61,7 @@ p iris_hash[:petal_length]
|
|
48
61
|
# => [1.4, 1.4, .. , 4.7, ..
|
49
62
|
p iris_hash[:petal_width]
|
50
63
|
# => [0.2, 0.2, .. , 1.4, ..
|
51
|
-
p iris_hash[:
|
64
|
+
p iris_hash[:label]
|
52
65
|
# => ["Iris-setosa", "Iris-setosa", .. , "Iris-versicolor", ..
|
53
66
|
|
54
67
|
|
@@ -60,7 +73,7 @@ p iris_table.fetch_values(:sepal_length, :sepal_width, :petal_length, :petal_wid
|
|
60
73
|
[7.0, 3.2, 4.7, 1.4],
|
61
74
|
:
|
62
75
|
|
63
|
-
p iris_table[:
|
76
|
+
p iris_table[:label]
|
64
77
|
# => ["Iris-setosa", "Iris-setosa", .. , "Iris-versicolor", ..
|
65
78
|
```
|
66
79
|
|
@@ -118,6 +131,9 @@ mnist.each do |record|
|
|
118
131
|
end
|
119
132
|
```
|
120
133
|
|
134
|
+
## NArray compatibility
|
135
|
+
|
136
|
+
* [red-datasets-numo-narray](https://github.com/red-data-tools/red-datasets-numo-narray)
|
121
137
|
|
122
138
|
## License
|
123
139
|
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,129 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.1.1 - 2021-04-11
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for Ruby 3.0.
|
8
|
+
|
9
|
+
* `Datasets::Communities`: Added.
|
10
|
+
[GitHub#64][Patch by Yasuo Honda]
|
11
|
+
|
12
|
+
* `Datasets::EStatJapan`: Added.
|
13
|
+
[GitHub#90][Patch by Kunihiko Miyoshi]
|
14
|
+
|
15
|
+
* `Datasets::Penguins`: Added.
|
16
|
+
[GitHub#100][Patch by Kenta Murata]
|
17
|
+
|
18
|
+
* `Datasets::CLDRPlurals`: Added.
|
19
|
+
|
20
|
+
### Thanks
|
21
|
+
|
22
|
+
* Yasuo Honda
|
23
|
+
|
24
|
+
* Kunihiko Miyoshi
|
25
|
+
|
26
|
+
* Kenta Murata
|
27
|
+
|
28
|
+
## 0.1.0 - 2020-02-04
|
29
|
+
|
30
|
+
### Improvements
|
31
|
+
|
32
|
+
* Added support for Ruby 2.7.
|
33
|
+
[GitHub#82][GitHub#83][Patch by Yasuo Honda]
|
34
|
+
|
35
|
+
* `Datasets::Hepatitis`: Added.
|
36
|
+
[GitHub#70][Patch by KazuhiroYoshimoto]
|
37
|
+
|
38
|
+
* `Datasets::Downloader`: Added support for query.
|
39
|
+
|
40
|
+
### Thanks
|
41
|
+
|
42
|
+
* Yasuo Honda
|
43
|
+
|
44
|
+
* KazuhiroYoshimoto
|
45
|
+
|
46
|
+
## 0.0.9 - 2019-09-09
|
47
|
+
|
48
|
+
### Improvements
|
49
|
+
|
50
|
+
* `Datasets::LIBSVMDatasetList`: Improved performance.
|
51
|
+
|
52
|
+
* `Datasets::Mushroom`: Added.
|
53
|
+
[GitHub#33][Patch by Yasuo Honda]
|
54
|
+
|
55
|
+
* `Datasets::Table#n_columns`: Added.
|
56
|
+
|
57
|
+
* `Datasets::Table#n_rows`: Added.
|
58
|
+
|
59
|
+
* `Datasets::Table#[]`: Added support for index access.
|
60
|
+
|
61
|
+
* `Datasets::Table#coolumn_names`: Added.
|
62
|
+
|
63
|
+
* `Datasets::Table#size`: Added.
|
64
|
+
|
65
|
+
* `Datasets::Table#length`: Added.
|
66
|
+
|
67
|
+
* `Datasets::Table#each_column`: Added.
|
68
|
+
|
69
|
+
* `Datasets::Table#each_record`: Added.
|
70
|
+
|
71
|
+
* `Datasets::Table#find_record`: Added.
|
72
|
+
|
73
|
+
### Thanks
|
74
|
+
|
75
|
+
* Yasuo Honda
|
76
|
+
|
77
|
+
### Improvements
|
78
|
+
|
79
|
+
## 0.0.8 - 2019-03-24
|
80
|
+
|
81
|
+
### Improvements
|
82
|
+
|
83
|
+
* Improved README.
|
84
|
+
[GitHub#40][Patch by kojix2]
|
85
|
+
|
86
|
+
* `Datasets::PostalCodeJapan`: Added.
|
87
|
+
|
88
|
+
* `Datasets::LIBSVMDatasetList`: Added.
|
89
|
+
|
90
|
+
* `Datasets::LIBSVM`: Added.
|
91
|
+
|
92
|
+
### Thanks
|
93
|
+
|
94
|
+
* kojix2
|
95
|
+
|
96
|
+
## 0.0.7 - 2018-11-21
|
97
|
+
|
98
|
+
### Improvements
|
99
|
+
|
100
|
+
* `Datasets::Table#dictionary_encode`: Added.
|
101
|
+
[GitHub#22]
|
102
|
+
|
103
|
+
* `Datasets::Table#label_encode`: Added.
|
104
|
+
|
105
|
+
* `Datasets::Dictionary`: Added.
|
106
|
+
|
107
|
+
* `Datasets::Wine`: Added.
|
108
|
+
[GitHub#26][Patch by Ryuta Suzuki]
|
109
|
+
|
110
|
+
* `Datasets::FashionMNIST`: Added.
|
111
|
+
[GitHub#27][Patch by chimame]
|
112
|
+
|
113
|
+
* `Datasets::Iris::Record#label`: Renamed from `#class`. This is an
|
114
|
+
incompatible change.
|
115
|
+
|
116
|
+
* `Datasets::Adult`: Added.
|
117
|
+
[GitHub#30][Patch by Yasuo Honda]
|
118
|
+
|
119
|
+
### Thanks
|
120
|
+
|
121
|
+
* Ryuta Suzuki
|
122
|
+
|
123
|
+
* chimame
|
124
|
+
|
125
|
+
* Yasuo Honda
|
126
|
+
|
3
127
|
## 0.0.6 - 2018-07-25
|
4
128
|
|
5
129
|
### Improvements
|
data/lib/datasets.rb
CHANGED
@@ -1,7 +1,19 @@
|
|
1
|
-
|
1
|
+
require_relative "datasets/version"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
require_relative "datasets/adult"
|
4
|
+
require_relative "datasets/cifar"
|
5
|
+
require_relative "datasets/cldr-plurals"
|
6
|
+
require_relative "datasets/communities"
|
7
|
+
require_relative "datasets/e-stat-japan"
|
8
|
+
require_relative "datasets/fashion-mnist"
|
9
|
+
require_relative "datasets/hepatitis"
|
10
|
+
require_relative "datasets/iris"
|
11
|
+
require_relative "datasets/libsvm"
|
12
|
+
require_relative "datasets/libsvm-dataset-list"
|
13
|
+
require_relative "datasets/mnist"
|
14
|
+
require_relative "datasets/mushroom"
|
15
|
+
require_relative "datasets/penguins"
|
16
|
+
require_relative "datasets/penn-treebank"
|
17
|
+
require_relative "datasets/postal-code-japan"
|
18
|
+
require_relative "datasets/wikipedia"
|
19
|
+
require_relative "datasets/wine"
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
require_relative "dataset"
|
4
|
+
|
5
|
+
module Datasets
|
6
|
+
class Adult < Dataset
|
7
|
+
Record = Struct.new(
|
8
|
+
:age,
|
9
|
+
:work_class,
|
10
|
+
:final_weight,
|
11
|
+
:education,
|
12
|
+
:n_education_years,
|
13
|
+
:marital_status,
|
14
|
+
:occupation,
|
15
|
+
:relationship,
|
16
|
+
:race,
|
17
|
+
:sex,
|
18
|
+
:capital_gain,
|
19
|
+
:capital_loss,
|
20
|
+
:hours_per_week,
|
21
|
+
:native_country,
|
22
|
+
:label
|
23
|
+
)
|
24
|
+
|
25
|
+
def initialize(type: :train)
|
26
|
+
unless [:train, :test].include?(type)
|
27
|
+
raise ArgumentError, 'Please set type :train or :test'
|
28
|
+
end
|
29
|
+
|
30
|
+
super()
|
31
|
+
@type = type
|
32
|
+
@metadata.id = "adult-#{@type}"
|
33
|
+
@metadata.name = "Adult: #{@type}"
|
34
|
+
@metadata.url = "http://archive.ics.uci.edu/ml/datasets/adult"
|
35
|
+
@metadata.description = lambda do
|
36
|
+
read_names
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def each
|
41
|
+
return to_enum(__method__) unless block_given?
|
42
|
+
|
43
|
+
open_data do |csv|
|
44
|
+
csv.each do |row|
|
45
|
+
next if row[0].nil?
|
46
|
+
record = Record.new(*row)
|
47
|
+
yield(record)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
def open_data
|
54
|
+
case @type
|
55
|
+
when :train
|
56
|
+
ext = "data"
|
57
|
+
when :test
|
58
|
+
ext = "test"
|
59
|
+
end
|
60
|
+
data_path = cache_dir_path + "adult-#{ext}.csv"
|
61
|
+
unless data_path.exist?
|
62
|
+
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}"
|
63
|
+
download(data_path, data_url)
|
64
|
+
end
|
65
|
+
|
66
|
+
options = {
|
67
|
+
converters: [:numeric, lambda {|f| f.strip}],
|
68
|
+
skip_lines: /\A\|/,
|
69
|
+
}
|
70
|
+
CSV.open(data_path, **options) do |csv|
|
71
|
+
yield(csv)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def read_names
|
76
|
+
names_path = cache_dir_path + "adult.names"
|
77
|
+
unless names_path.exist?
|
78
|
+
names_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names"
|
79
|
+
download(names_path, names_url)
|
80
|
+
end
|
81
|
+
names_path.read
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,385 @@
|
|
1
|
+
require "rexml/streamlistener"
|
2
|
+
require "rexml/parsers/baseparser"
|
3
|
+
require "rexml/parsers/streamparser"
|
4
|
+
require "strscan"
|
5
|
+
|
6
|
+
require_relative "dataset"
|
7
|
+
|
8
|
+
module Datasets
|
9
|
+
class CLDRPlurals < Dataset
|
10
|
+
Locale = Struct.new(:name,
|
11
|
+
:rules)
|
12
|
+
|
13
|
+
Rule = Struct.new(:count,
|
14
|
+
:condition,
|
15
|
+
:integer_samples,
|
16
|
+
:decimal_samples)
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
super()
|
20
|
+
@metadata.id = "cldr-plurals"
|
21
|
+
@metadata.name = "CLDR language plural rules"
|
22
|
+
@metadata.url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/plurals.xml"
|
23
|
+
@metadata.licenses = ["Unicode-DFS-2016"]
|
24
|
+
@metadata.description = <<~DESCRIPTION
|
25
|
+
Language plural rules in Unicode Common Locale Data Repository.
|
26
|
+
See also: https://unicode-org.github.io/cldr-staging/charts/latest/supplemental/language_plural_rules.html
|
27
|
+
DESCRIPTION
|
28
|
+
end
|
29
|
+
|
30
|
+
def each(&block)
|
31
|
+
return to_enum(__method__) unless block_given?
|
32
|
+
|
33
|
+
open_data do |input|
|
34
|
+
catch do |abort_tag|
|
35
|
+
listener = Listener.new(abort_tag, &block)
|
36
|
+
parser = REXML::Parsers::StreamParser.new(input, listener)
|
37
|
+
parser.parse
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def open_data
|
44
|
+
data_path = cache_dir_path + "plurals.xml"
|
45
|
+
unless data_path.exist?
|
46
|
+
download(data_path, @metadata.url)
|
47
|
+
end
|
48
|
+
::File.open(data_path) do |input|
|
49
|
+
yield(input)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Spec: https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
|
54
|
+
class Listener
|
55
|
+
include REXML::StreamListener
|
56
|
+
|
57
|
+
def initialize(abort_tag, &block)
|
58
|
+
@abort_tag = abort_tag
|
59
|
+
@block = block
|
60
|
+
@tag_name_stack = []
|
61
|
+
end
|
62
|
+
|
63
|
+
def tag_start(name, attributes)
|
64
|
+
@tag_name_stack.push(name)
|
65
|
+
case name
|
66
|
+
when "pluralRules"
|
67
|
+
@locales = attributes["locales"].split
|
68
|
+
@rules = []
|
69
|
+
when "pluralRule"
|
70
|
+
@rule = Rule.new(attributes["count"])
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def tag_end(name)
|
75
|
+
case name
|
76
|
+
when "pluralRules"
|
77
|
+
@locales.each do |locale_name|
|
78
|
+
@block.call(Locale.new(locale_name, @rules))
|
79
|
+
end
|
80
|
+
when "pluralRule"
|
81
|
+
@rules << @rule
|
82
|
+
end
|
83
|
+
@tag_name_stack.pop
|
84
|
+
end
|
85
|
+
|
86
|
+
def text(data)
|
87
|
+
case @tag_name_stack.last
|
88
|
+
when "pluralRule"
|
89
|
+
parse_plural_rule(data)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
def parse_plural_rule(data)
|
95
|
+
parser = RuleParser.new(@rule, data)
|
96
|
+
parser.parse
|
97
|
+
end
|
98
|
+
end
|
99
|
+
private_constant :Listener
|
100
|
+
|
101
|
+
# Syntax: http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax
|
102
|
+
class RuleParser
|
103
|
+
def initialize(rule, data)
|
104
|
+
@rule = rule
|
105
|
+
@data = data
|
106
|
+
@scanner = StringScanner.new(@data)
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse
|
110
|
+
@rule.condition = parse_condition
|
111
|
+
skip_whitespaces
|
112
|
+
if @scanner.scan(/@integer/)
|
113
|
+
@rule.integer_samples = parse_sample_list
|
114
|
+
end
|
115
|
+
skip_whitespaces
|
116
|
+
if @scanner.scan(/@decimal/)
|
117
|
+
@rule.decimal_samples = parse_sample_list
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
def skip_whitespaces
|
123
|
+
@scanner.skip(/\p{Pattern_White_Space}+/)
|
124
|
+
end
|
125
|
+
|
126
|
+
def parse_condition
|
127
|
+
and_condition = parse_and_condition
|
128
|
+
return nil if and_condition.nil?
|
129
|
+
and_conditions = [and_condition]
|
130
|
+
while parse_or
|
131
|
+
and_conditions << parse_and_condition
|
132
|
+
end
|
133
|
+
if and_conditions.size == 1
|
134
|
+
and_condition
|
135
|
+
else
|
136
|
+
[:or, *and_conditions]
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def parse_or
|
141
|
+
skip_whitespaces
|
142
|
+
@scanner.scan(/or/)
|
143
|
+
end
|
144
|
+
|
145
|
+
def parse_and_condition
|
146
|
+
skip_whitespaces
|
147
|
+
relation = parse_relation
|
148
|
+
return nil if relation.nil?
|
149
|
+
relations = [relation]
|
150
|
+
while parse_and
|
151
|
+
relations << parse_relation
|
152
|
+
end
|
153
|
+
if relations.size == 1
|
154
|
+
relation
|
155
|
+
else
|
156
|
+
[:and, *relations]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def parse_and
|
161
|
+
skip_whitespaces
|
162
|
+
@scanner.scan(/and/)
|
163
|
+
end
|
164
|
+
|
165
|
+
def parse_relation
|
166
|
+
parse_is_relation or
|
167
|
+
parse_in_relation or
|
168
|
+
parse_within_relation
|
169
|
+
end
|
170
|
+
|
171
|
+
def parse_is_relation
|
172
|
+
position = @scanner.pos
|
173
|
+
skip_whitespaces
|
174
|
+
expr = parse_expr
|
175
|
+
unless parse_is
|
176
|
+
@scanner.pos = position
|
177
|
+
return nil
|
178
|
+
end
|
179
|
+
if parse_not
|
180
|
+
operator = :is_not
|
181
|
+
else
|
182
|
+
operator = :is
|
183
|
+
end
|
184
|
+
value = parse_value
|
185
|
+
if value.nil?
|
186
|
+
raise Error.new("no value for #{operator}: #{@scanner.inspect}")
|
187
|
+
end
|
188
|
+
[operator, expr, value]
|
189
|
+
end
|
190
|
+
|
191
|
+
def parse_is
|
192
|
+
skip_whitespaces
|
193
|
+
@scanner.scan(/is/)
|
194
|
+
end
|
195
|
+
|
196
|
+
def parse_not
|
197
|
+
skip_whitespaces
|
198
|
+
@scanner.scan(/not/)
|
199
|
+
end
|
200
|
+
|
201
|
+
def parse_in_relation
|
202
|
+
position = @scanner.pos
|
203
|
+
skip_whitespaces
|
204
|
+
expr = parse_expr
|
205
|
+
if parse_not
|
206
|
+
if parse_in
|
207
|
+
operator = :not_in
|
208
|
+
else
|
209
|
+
@scanner.ops = position
|
210
|
+
return nil
|
211
|
+
end
|
212
|
+
elsif parse_in
|
213
|
+
operator = :in
|
214
|
+
elsif parse_equal
|
215
|
+
operator = :equal
|
216
|
+
elsif parse_not_equal
|
217
|
+
operator = :not_equal
|
218
|
+
else
|
219
|
+
@scanner.pos = position
|
220
|
+
return nil
|
221
|
+
end
|
222
|
+
range_list = parse_range_list
|
223
|
+
[operator, expr, range_list]
|
224
|
+
end
|
225
|
+
|
226
|
+
def parse_in
|
227
|
+
skip_whitespaces
|
228
|
+
@scanner.scan(/in/)
|
229
|
+
end
|
230
|
+
|
231
|
+
def parse_equal
|
232
|
+
skip_whitespaces
|
233
|
+
@scanner.scan(/=/)
|
234
|
+
end
|
235
|
+
|
236
|
+
def parse_not_equal
|
237
|
+
skip_whitespaces
|
238
|
+
@scanner.scan(/!=/)
|
239
|
+
end
|
240
|
+
|
241
|
+
def parse_within_relation
|
242
|
+
position = @scanner.pos
|
243
|
+
skip_whitespaces
|
244
|
+
expr = parse_expr
|
245
|
+
have_not = parse_not
|
246
|
+
unless parse_within
|
247
|
+
@scanner.pos = position
|
248
|
+
return nil
|
249
|
+
end
|
250
|
+
if have_not
|
251
|
+
operator = :not_within
|
252
|
+
else
|
253
|
+
operator = :within
|
254
|
+
end
|
255
|
+
range_list = parse_range_list
|
256
|
+
[operator, expr, range_list]
|
257
|
+
end
|
258
|
+
|
259
|
+
def parse_within
|
260
|
+
skip_whitespaces
|
261
|
+
@scanner.scan(/within/)
|
262
|
+
end
|
263
|
+
|
264
|
+
def parse_expr
|
265
|
+
operand = parse_operand
|
266
|
+
operator = parse_expr_operator
|
267
|
+
if operator
|
268
|
+
value = parse_value
|
269
|
+
if value.nil?
|
270
|
+
raise Error.new("no value for #{operator}: #{@scanner.inspect}")
|
271
|
+
end
|
272
|
+
[operator, operand, value]
|
273
|
+
else
|
274
|
+
operand
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def parse_operand
|
279
|
+
skip_whitespaces
|
280
|
+
@scanner.scan(/[niftvwce]/)
|
281
|
+
end
|
282
|
+
|
283
|
+
def parse_expr_operator
|
284
|
+
skip_whitespaces
|
285
|
+
if @scanner.scan(/(?:mod|%)/)
|
286
|
+
:mod
|
287
|
+
else
|
288
|
+
nil
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def parse_range_list
|
293
|
+
ranges = [parse_range || parse_value]
|
294
|
+
loop do
|
295
|
+
skip_whitespaces
|
296
|
+
break unless @scanner.scan(/,/)
|
297
|
+
ranges << (parse_range || parse_value)
|
298
|
+
end
|
299
|
+
ranges
|
300
|
+
end
|
301
|
+
|
302
|
+
def parse_range
|
303
|
+
position = @scanner.pos
|
304
|
+
range_start = parse_value
|
305
|
+
skip_whitespaces
|
306
|
+
unless @scanner.scan(/\.\./)
|
307
|
+
@scanner.pos = position
|
308
|
+
return nil
|
309
|
+
end
|
310
|
+
range_end = parse_value
|
311
|
+
range_start..range_end
|
312
|
+
end
|
313
|
+
|
314
|
+
def parse_value
|
315
|
+
skip_whitespaces
|
316
|
+
value = @scanner.scan(/\d+/)
|
317
|
+
return nil if value.nil?
|
318
|
+
Integer(value, 10)
|
319
|
+
end
|
320
|
+
|
321
|
+
def parse_sample_list
|
322
|
+
samples = [parse_sample_range]
|
323
|
+
loop do
|
324
|
+
position = @scanner.pos
|
325
|
+
skip_whitespaces
|
326
|
+
break unless @scanner.scan(/,/)
|
327
|
+
sample_range = parse_sample_range
|
328
|
+
unless sample_range
|
329
|
+
@scanner.pos = position
|
330
|
+
break
|
331
|
+
end
|
332
|
+
samples << sample_range
|
333
|
+
end
|
334
|
+
skip_whitespaces
|
335
|
+
if @scanner.scan(/,/)
|
336
|
+
skip_whitespaces
|
337
|
+
# U+2026 HORIZONTAL ELLIPSIS
|
338
|
+
unless @scanner.scan(/\u2026|\.\.\./)
|
339
|
+
raise "no ellipsis: #{@scanner.inspect}"
|
340
|
+
end
|
341
|
+
samples << :elipsis
|
342
|
+
end
|
343
|
+
samples
|
344
|
+
end
|
345
|
+
|
346
|
+
def parse_sample_range
|
347
|
+
value = parse_sample_value
|
348
|
+
return nil if value.nil?
|
349
|
+
skip_whitespaces
|
350
|
+
if @scanner.scan(/~/)
|
351
|
+
range_end = parse_sample_value
|
352
|
+
value..range_end
|
353
|
+
else
|
354
|
+
value
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
def parse_sample_value
|
359
|
+
value = parse_value
|
360
|
+
return nil if value.nil?
|
361
|
+
if @scanner.scan(/\./)
|
362
|
+
skip_whitespaces
|
363
|
+
decimal = @scanner.scan(/[0-9]+/)
|
364
|
+
if decimal.nil?
|
365
|
+
raise "no decimal: #{@scanner.inspect}"
|
366
|
+
end
|
367
|
+
value += Float("0.#{decimal}")
|
368
|
+
skip_whitespaces
|
369
|
+
end
|
370
|
+
if @scanner.scan(/[ce]/)
|
371
|
+
# Workardoun for a spec bug. "e1" should be accepted.
|
372
|
+
#
|
373
|
+
# Spec:
|
374
|
+
# sampleValue = value ('.' digit+)? ([ce] digitPos digit+)?
|
375
|
+
# digit = [0-9]
|
376
|
+
# digitPos = [1-9]
|
377
|
+
e = @scanner.scan(/[1-9][0-9]*/)
|
378
|
+
value *= 10 * Integer(e, 10)
|
379
|
+
end
|
380
|
+
value
|
381
|
+
end
|
382
|
+
end
|
383
|
+
private_constant :RuleParser
|
384
|
+
end
|
385
|
+
end
|