cldr-plurals 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +18 -0
- data/History.txt +3 -0
- data/README.md +139 -0
- data/Rakefile +84 -0
- data/cldr-plurals.gemspec +18 -0
- data/lib/cldr-plurals.rb +7 -0
- data/lib/cldr-plurals/compiler.rb +43 -0
- data/lib/cldr-plurals/compiler/emitter.rb +30 -0
- data/lib/cldr-plurals/compiler/nodes.rb +86 -0
- data/lib/cldr-plurals/compiler/parser.rb +153 -0
- data/lib/cldr-plurals/compiler/tokenizer.rb +56 -0
- data/lib/cldr-plurals/javascript_emitter.rb +106 -0
- data/lib/cldr-plurals/ruby_emitter.rb +108 -0
- data/lib/cldr-plurals/version.rb +5 -0
- data/spec/javascript_sample_spec.rb +48 -0
- data/spec/ruby_sample_spec.rb +43 -0
- data/spec/samples.yml +3371 -0
- data/spec/spec_helper.rb +49 -0
- metadata +63 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8f030dac2488f8587a8c08bb417020586fbf8fb1
|
4
|
+
data.tar.gz: 6a8e9b73402a09d1d784639a369b6f22730d30e9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ac49b1952414dd79e8d5cef92aec0916e8815c29c2a3e354eaf807a26a022af32a09f35cdd12d816c131c3d0d488a07b22caedb9f2f74b452ebae058ac661247
|
7
|
+
data.tar.gz: e97db6ef1351a1873aea7e0d1c893008e5f4131e1b84f922c6afa03d2563d8b065aab48846276b7f25d6378ae2c6e95836298efcd5691a8a9b30f981f57331fe
|
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source "https://rubygems.org"
|
2
|
+
|
3
|
+
gemspec
|
4
|
+
|
5
|
+
group :development, :test do
|
6
|
+
gem 'pry-nav'
|
7
|
+
gem 'rake'
|
8
|
+
gem 'nokogiri'
|
9
|
+
end
|
10
|
+
|
11
|
+
group :test do
|
12
|
+
gem 'rspec'
|
13
|
+
gem 'rr'
|
14
|
+
|
15
|
+
gem 'therubyracer', '~> 0.12.0'
|
16
|
+
gem 'cldr-plurals-runtime-rb', '~> 1.0.0'
|
17
|
+
gem 'cldr-plurals-runtime-js', '~> 1.0.0'
|
18
|
+
end
|
data/History.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
cldr-plurals
|
2
|
+
=================
|
3
|
+
|
4
|
+
[![Build Status](https://travis-ci.org/camertron/cldr-plurals.svg?branch=master)](http://travis-ci.org/camertron/cldr-plurals)
|
5
|
+
|
6
|
+
Tokenizes and parses CLDR plural rules and provides a mechanism for emitting them as source code.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
`gem install cldr-plurals`
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
require 'cldr-plurals'
|
16
|
+
```
|
17
|
+
|
18
|
+
## Rules
|
19
|
+
|
20
|
+
The CLDR data set contains [plural information](http://unicode.org/cldr/trac/browser/tags/release-26-d04/common/supplemental/plurals.xml) for numerous languages in an expression-based [format](http://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules) defined by Unicode's TR35. This gem can tokenize, parse, and emit these rules as source code. Currently Ruby and Javascript are supported.
|
21
|
+
|
22
|
+
### Tokenizing
|
23
|
+
|
24
|
+
Generate a list of tokens using the `Tokenizer` class:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
tokens = CldrPlurals::Compiler::Tokenizer.tokenize(
|
28
|
+
'v = 0 and i % 10 = 1 and i % 100 != 11'
|
29
|
+
)
|
30
|
+
```
|
31
|
+
|
32
|
+
### Parsing
|
33
|
+
|
34
|
+
Once you have a token list, the `Parser` class can turn them into an [abstract syntax tree](http://en.wikipedia.org/wiki/Abstract_syntax_tree), (i.e a `CldrPlurals::Compiler::Rule`) for you:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
rule = CldrPlurals::Compiler::Parser.new(tokens).parse
|
38
|
+
```
|
39
|
+
|
40
|
+
### Emitting
|
41
|
+
|
42
|
+
Rules can be emitted as either Ruby or Javascript source code via the appropriate `Emitter` class:
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
ruby_code = CldrPlurals::RubyEmitter.emit_rule_standalone(rule)
|
46
|
+
```
|
47
|
+
|
48
|
+
For our example above, the emitted standalone ruby code looks like this:
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
lambda { |n, i, f, t, v, w| ((v == 0 && i % 10 == 1) && i % 100 != 11) }
|
52
|
+
```
|
53
|
+
|
54
|
+
## Rule Lists
|
55
|
+
|
56
|
+
The CLDR data set defines groups of rules that together determine the plural form for a given number. Create a `RuleList` object to group rules together. `RuleList`s take care of the tokenizing and parsing steps for you:
|
57
|
+
|
58
|
+
```ruby
|
59
|
+
rules = CldrPlurals::Compiler::RuleList.new(:ru).tap do |rule_list|
|
60
|
+
rule_list.add_rule(:one, 'v = 0 and i % 10 = 1 and i % 100 != 11')
|
61
|
+
rule_list.add_rule(:few, 'v = 0 and i % 10 = 2..4 and i % 100 != 12..14')
|
62
|
+
rule_list.add_rule(:many, 'v = 0 and i % 10 = 0 or v = 0 and i % 10 = 5..9 or v = 0 and i % 100 = 11..14')
|
63
|
+
end
|
64
|
+
```
|
65
|
+
|
66
|
+
Note that there's no need to add an explicit rule for the `other` plural form.
|
67
|
+
|
68
|
+
`RuleList`s can be emitted in their entirety via the `to_code` method, which accepts a target language as an argument:
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
rules.to_code(:ruby)
|
72
|
+
```
|
73
|
+
|
74
|
+
Which produces:
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
lambda { |num, runtime| n = runtime.n(num); i = runtime.i(num); v = runtime.v(num); w = runtime.w(num); f = runtime.f(num); t = runtime.t(num); (((v == 0 && i % 10 == 1) && i % 100 != 11) ? :one : (((v == 0 && (2..4).include?(i % 10)) && !(12..14).include?(i % 100)) ? :few : ((((v == 0 && i % 10 == 0) || (v == 0 && (5..9).include?(i % 10))) || (v == 0 && (11..14).include?(i % 100))) ? :many : :other))) }
|
78
|
+
```
|
79
|
+
|
80
|
+
OR
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
rules.to_code(:javascript)
|
84
|
+
```
|
85
|
+
|
86
|
+
Which produces:
|
87
|
+
|
88
|
+
```javascript
|
89
|
+
(function(num, runtime) { var n = runtime.n(num); var i = runtime.i(num); var v = runtime.v(num); var w = runtime.w(num); var f = runtime.f(num); var t = runtime.t(num); return (((v == 0 && i % 10 == 1) && i % 100 != 11) ? 'one' : (((v == 0 && ((i % 10 >= 2) && (i % 10 <= 4))) && !((i % 100 >= 12) && (i % 100 <= 14))) ? 'few' : ((((v == 0 && i % 10 == 0) || (v == 0 && ((i % 10 >= 5) && (i % 10 <= 9)))) || (v == 0 && ((i % 100 >= 11) && (i % 100 <= 14)))) ? 'many' : 'other'))); })
|
90
|
+
```
|
91
|
+
|
92
|
+
## Executing Targets
|
93
|
+
|
94
|
+
You may have noticed that emitted target source code requires a runtime (the second argument). Runtimes provide methods to determine the 'parts' of a number, like the decimal portion or the number of decimals without trailing zeroes. Runtimes for [Ruby](https://github.com/camertron/cldr-plurals-runtime-rb) and [Javascript](https://github.com/camertron/cldr-plurals-runtime-js) exist as separate rubygems, so you'll need to include them in your project before being able to execute target code. Each runtime gem's README contains instructions on how to use it. Here are some quick examples:
|
95
|
+
|
96
|
+
Ruby:
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
# construct RuleList...
|
100
|
+
|
101
|
+
require 'cldr-plurals/ruby_runtime'
|
102
|
+
|
103
|
+
ruby_code = rules.to_code(:ruby)
|
104
|
+
plural_proc = eval(ruby_code)
|
105
|
+
|
106
|
+
plural_proc.call('3', CldrPlurals::RubyRuntime) # => :few
|
107
|
+
```
|
108
|
+
|
109
|
+
Javascript:
|
110
|
+
|
111
|
+
```ruby
|
112
|
+
# construct RuleList...
|
113
|
+
|
114
|
+
require 'cldr-plurals/javascript_runtime'
|
115
|
+
|
116
|
+
plural_code = rules.to_code(:javascript)
|
117
|
+
|
118
|
+
File.open('./plurals-ru.js', 'w+') do |f|
|
119
|
+
f.write("
|
120
|
+
var runtime = #{CldrPlurals::JavascriptRuntime.source};
|
121
|
+
var rules = #{plural_code};
|
122
|
+
console.log(rules('3', runtime));
|
123
|
+
")
|
124
|
+
end
|
125
|
+
```
|
126
|
+
|
127
|
+
Then, running `node ./plurals-ru.js` should print `'few'`.
|
128
|
+
|
129
|
+
## Requirements
|
130
|
+
|
131
|
+
No external requirements.
|
132
|
+
|
133
|
+
## Running Tests
|
134
|
+
|
135
|
+
`bundle exec rake` should do the trick. Alternatively you can run `bundle exec rspec`, which does the same thing.
|
136
|
+
|
137
|
+
## Authors
|
138
|
+
|
139
|
+
* Cameron C. Dutro: http://github.com/camertron
|
data/Rakefile
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'rubygems' unless ENV['NO_RUBYGEMS']
|
4
|
+
|
5
|
+
require 'bundler'
|
6
|
+
require 'rspec/core/rake_task'
|
7
|
+
require 'rubygems/package_task'
|
8
|
+
|
9
|
+
require './lib/cldr-plurals'
|
10
|
+
|
11
|
+
Bundler::GemHelper.install_tasks
|
12
|
+
|
13
|
+
task :default => :spec
|
14
|
+
|
15
|
+
desc 'Run specs'
|
16
|
+
RSpec::Core::RakeTask.new do |t|
|
17
|
+
t.pattern = './spec/**/*_spec.rb'
|
18
|
+
end
|
19
|
+
|
20
|
+
def expand_sample_range(sample_range)
|
21
|
+
first, last = sample_range.split('~')
|
22
|
+
|
23
|
+
if decimal_idx = first.index('.')
|
24
|
+
decimal = first[(decimal_idx + 1)..-1]
|
25
|
+
increment = 1.0 / (10 ** decimal.length)
|
26
|
+
current = first.to_f
|
27
|
+
last_f = last.to_f
|
28
|
+
results = []
|
29
|
+
|
30
|
+
while current <= last_f
|
31
|
+
results << current.to_s
|
32
|
+
current = (current + increment).round(decimal.length)
|
33
|
+
end
|
34
|
+
|
35
|
+
results
|
36
|
+
else
|
37
|
+
(first.to_i..last.to_i).to_a.map(&:to_s)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def expand_samples(sample_str)
|
42
|
+
sample_str.split(', ').flat_map do |sample_chunk|
|
43
|
+
if sample_chunk.include?('~')
|
44
|
+
expand_sample_range(sample_chunk)
|
45
|
+
else
|
46
|
+
[sample_chunk]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
task :update_samples do
|
52
|
+
require 'open-uri'
|
53
|
+
require 'nokogiri'
|
54
|
+
require 'yaml'
|
55
|
+
|
56
|
+
url = 'http://unicode.org/cldr/trac/browser/tags/release-26-d04/' +
|
57
|
+
'common/supplemental/plurals.xml?format=txt'
|
58
|
+
|
59
|
+
doc = Nokogiri::XML(open(url).read)
|
60
|
+
|
61
|
+
samples = (doc / 'pluralRules').each_with_object({}) do |rules, ret|
|
62
|
+
locales = rules.attributes['locales'].value.split(' ').join('/')
|
63
|
+
|
64
|
+
ret[locales] = (rules / 'pluralRule').map do |rule|
|
65
|
+
chunks = rule.text.split(/(@integer|@decimal)/)
|
66
|
+
name = rule.attributes['count'].value.to_sym
|
67
|
+
|
68
|
+
{
|
69
|
+
text: chunks.first.strip,
|
70
|
+
name: name,
|
71
|
+
samples: chunks[1..-1].each_slice(2).map do |slice|
|
72
|
+
{
|
73
|
+
type: slice.first.strip,
|
74
|
+
samples: expand_samples(slice.last.strip.chomp(', …'))
|
75
|
+
}
|
76
|
+
end
|
77
|
+
}
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
File.open(File.join(File.dirname(__FILE__), 'spec/samples.yml'), 'w+') do |f|
|
82
|
+
f.write(YAML.dump(samples))
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
2
|
+
require 'cldr-plurals/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "cldr-plurals"
|
6
|
+
s.version = ::CldrPlurals::VERSION
|
7
|
+
s.authors = ["Cameron Dutro"]
|
8
|
+
s.email = ["camertron@gmail.com"]
|
9
|
+
s.homepage = "http://github.com/camertron"
|
10
|
+
|
11
|
+
s.description = s.summary = 'Tokenizes and parses CLDR plural rules and provides a mechanism for emitting them as source code'
|
12
|
+
|
13
|
+
s.platform = Gem::Platform::RUBY
|
14
|
+
s.has_rdoc = true
|
15
|
+
|
16
|
+
s.require_path = 'lib'
|
17
|
+
s.files = Dir["{lib,spec}/**/*", "Gemfile", "History.txt", "README.md", "Rakefile", "cldr-plurals.gemspec"]
|
18
|
+
end
|
data/lib/cldr-plurals.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CldrPlurals
|
4
|
+
module Compiler
|
5
|
+
autoload :Tokenizer, 'cldr-plurals/compiler/tokenizer'
|
6
|
+
autoload :Parser, 'cldr-plurals/compiler/parser'
|
7
|
+
autoload :Emitter, 'cldr-plurals/compiler/emitter'
|
8
|
+
|
9
|
+
class RuleList
|
10
|
+
attr_reader :locale, :rules
|
11
|
+
|
12
|
+
def initialize(locale)
|
13
|
+
@locale = locale
|
14
|
+
@rules = []
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_rule(name, rule_string)
|
18
|
+
rule = Parser.new(Tokenizer.tokenize(rule_string)).parse
|
19
|
+
rule.name = name
|
20
|
+
rules << rule
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_code(prog_lang)
|
25
|
+
emitter = find_emitter(prog_lang)
|
26
|
+
emitter.emit_rules(self)
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def find_emitter(prog_lang)
|
32
|
+
lang = prog_lang.to_s.gsub(/(^\w|[-_]\w)/) { $1[-1].upcase }
|
33
|
+
const_name = "#{lang}Emitter"
|
34
|
+
|
35
|
+
if CldrPlurals.const_defined?(const_name)
|
36
|
+
CldrPlurals.const_get(const_name)
|
37
|
+
else
|
38
|
+
raise ArgumentError, "emitter '#{lang}::#{const_name}' not found."
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CldrPlurals
|
4
|
+
module Compiler
|
5
|
+
class Emitter
|
6
|
+
class << self
|
7
|
+
|
8
|
+
protected
|
9
|
+
|
10
|
+
def emit(obj)
|
11
|
+
name = obj.class.name
|
12
|
+
.split('::').last
|
13
|
+
.gsub(/([a-z\d])([A-Z])/, '\1_\2')
|
14
|
+
.downcase
|
15
|
+
|
16
|
+
method_sym = :"emit_#{name}"
|
17
|
+
|
18
|
+
if respond_to?(method_sym, true)
|
19
|
+
send(method_sym, obj)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def emit_all(*objs)
|
24
|
+
objs.map { |obj| emit(obj) }
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module CldrPlurals
|
4
|
+
module Compiler
|
5
|
+
class Rule
|
6
|
+
attr_reader :root
|
7
|
+
attr_accessor :name
|
8
|
+
|
9
|
+
def initialize(root)
|
10
|
+
@root = root
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# i % 1
|
15
|
+
class Expression
|
16
|
+
attr_reader :operand, :operation, :value
|
17
|
+
|
18
|
+
def initialize(operand, operation, value)
|
19
|
+
@operand = operand
|
20
|
+
@operation = operation
|
21
|
+
@value = value
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# i % 1 = 0
|
26
|
+
class Relation
|
27
|
+
attr_reader :expression, :operation, :value
|
28
|
+
|
29
|
+
def initialize(expression, operation, value)
|
30
|
+
@expression = expression
|
31
|
+
@operation = operation
|
32
|
+
@value = value
|
33
|
+
end
|
34
|
+
|
35
|
+
def operand
|
36
|
+
expression.operand
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# and/or
|
41
|
+
class Condition
|
42
|
+
attr_reader :right, :left
|
43
|
+
|
44
|
+
def initialize(right, left)
|
45
|
+
@right = right
|
46
|
+
@left = left
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class AndCondition < Condition; end
|
51
|
+
class OrCondition < Condition; end
|
52
|
+
|
53
|
+
class Range
|
54
|
+
attr_reader :start, :finish
|
55
|
+
|
56
|
+
def initialize(start, finish)
|
57
|
+
@start = start
|
58
|
+
@finish = finish
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class ValueSet
|
63
|
+
attr_reader :values
|
64
|
+
|
65
|
+
def initialize(values)
|
66
|
+
@values = values
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class Operator
|
71
|
+
attr_reader :symbol
|
72
|
+
|
73
|
+
def initialize(symbol)
|
74
|
+
@symbol = symbol
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class Operand
|
79
|
+
attr_reader :symbol
|
80
|
+
|
81
|
+
def initialize(symbol)
|
82
|
+
@symbol = symbol
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|