twitter_cldr 3.0.4 → 3.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/History.txt +5 -0
- data/lib/twitter_cldr/formatters/numbers/rbnf.rb +0 -1
- data/lib/twitter_cldr/resources/unicode_data_importer.rb +0 -1
- data/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb +27 -1
- data/lib/twitter_cldr/tokenizers/token.rb +7 -2
- data/lib/twitter_cldr/version.rb +1 -1
- data/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb +9 -0
- data/spec/tokenizers/numbers/number_tokenizer_spec.rb +16 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZDU2MmRkOWM4ZGYzOWVhM2E1M2ZmNWE3ZGMwYWNmOGVjYTY2ZGQ1Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZDZkNWRmOGMyMTliZmI2ODMzZGYxMzIyNTNjMGRiNTFjOGZlYzYwMg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZDY2OTYwMjIxZDMxZTk0OTJhMDJmNjI2OGEzNjI2YTIwYTc2MDc4YWYxMzkz
|
10
|
+
NDI3YzBhMTE2YWZjODJkYTZlNWJiNDU2ZWEyMGYyMmEzNjY5ODM0YmM4YTll
|
11
|
+
NjQyNzIzMzJkN2EyODlkZThlNjFmNzA2Mzg2MGMzZWJlZDNmZWI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
OGQzNDRjYjQ0NjhjM2Q5MGJlZTQ0NmI3YWQ2YWZiMWVmY2IzNjgyN2Y2ZDli
|
14
|
+
ZjRjYTBlY2M5ZTdlNDc3YWYyZWI4NTdjZTE2NjA1ODA3N2QwNjMyZjQ1Nzkw
|
15
|
+
NjViNzc2OTkzNmI4OGI2YTY0MGI0YzIwMmQ3Yzk1YmY3OGVkNGQ=
|
data/History.txt
CHANGED
@@ -100,7 +100,6 @@ module TwitterCldr
|
|
100
100
|
def rule_set_from_resource(rule_set_data)
|
101
101
|
RuleSet.new(
|
102
102
|
rule_set_data[:rules].map do |rule|
|
103
|
-
binding.pry unless rule[:rule].is_a?(String)
|
104
103
|
Rule.new(rule[:value], rule[:rule], rule[:radix])
|
105
104
|
end,
|
106
105
|
rule_set_data[:type],
|
@@ -7,6 +7,22 @@ module TwitterCldr
|
|
7
7
|
module Tokenizers
|
8
8
|
class NumberTokenizer
|
9
9
|
|
10
|
+
SPECIAL_SYMBOLS_MAP = {
|
11
|
+
'.' => '{DOT}',
|
12
|
+
',' => '{COMMA}',
|
13
|
+
'0' => '{ZERO}',
|
14
|
+
'#' => '{POUND}',
|
15
|
+
'¤' => '{CURRENCY}',
|
16
|
+
'%' => '{PERCENT}',
|
17
|
+
'E' => '{SCIENTIFIC}'
|
18
|
+
}
|
19
|
+
|
20
|
+
SPECIAL_SYMBOLS_REGEX = /'(?:#{SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')})'/
|
21
|
+
|
22
|
+
INVERSE_SPECIAL_SYMBOLS_MAP = SPECIAL_SYMBOLS_MAP.invert
|
23
|
+
|
24
|
+
INVERSE_SPECIAL_SYMBOLS_REGEX = /#{INVERSE_SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')}/
|
25
|
+
|
10
26
|
attr_reader :data_reader
|
11
27
|
|
12
28
|
def initialize(data_reader)
|
@@ -14,7 +30,17 @@ module TwitterCldr
|
|
14
30
|
end
|
15
31
|
|
16
32
|
def tokenize(pattern)
|
17
|
-
|
33
|
+
escaped_pattern = pattern.gsub(SPECIAL_SYMBOLS_REGEX) do |match|
|
34
|
+
SPECIAL_SYMBOLS_MAP[match[1..-2]]
|
35
|
+
end
|
36
|
+
|
37
|
+
tokens = PatternTokenizer.new(data_reader, tokenizer).tokenize(escaped_pattern)
|
38
|
+
|
39
|
+
tokens.each do |token|
|
40
|
+
token.value = token.value.gsub(INVERSE_SPECIAL_SYMBOLS_REGEX) do |match|
|
41
|
+
INVERSE_SPECIAL_SYMBOLS_MAP[match]
|
42
|
+
end
|
43
|
+
end
|
18
44
|
|
19
45
|
if tokens.first.value == ""
|
20
46
|
tokens[1..-1]
|
@@ -14,12 +14,17 @@ module TwitterCldr
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
def to_hash
|
18
|
+
{ :value => @value, :type => @type }
|
19
|
+
end
|
20
|
+
|
17
21
|
def to_s
|
18
22
|
@value
|
19
23
|
end
|
20
24
|
|
21
|
-
|
22
|
-
|
25
|
+
# overriding `to_s` also overrides `inspect`, so we have to redefine it manually
|
26
|
+
def inspect
|
27
|
+
"<#{self.class}: #{instance_variables.map {|v| "#{v}=#{instance_variable_get(v).inspect}" }.join(", ")}>"
|
23
28
|
end
|
24
29
|
end
|
25
30
|
end
|
data/lib/twitter_cldr/version.rb
CHANGED
@@ -67,4 +67,13 @@ describe ShortDecimalFormatter do
|
|
67
67
|
expect(format_number(number)).to match_normalized("9300万")
|
68
68
|
end
|
69
69
|
end
|
70
|
+
|
71
|
+
context "with Russian locale" do
|
72
|
+
let(:locale) { :ru }
|
73
|
+
|
74
|
+
it "formats a number with a literal period" do
|
75
|
+
number = 1_000
|
76
|
+
expect(format_number(number)).to match_normalized("1 тыс.")
|
77
|
+
end
|
78
|
+
end
|
70
79
|
end
|
@@ -31,6 +31,22 @@ describe NumberTokenizer do
|
|
31
31
|
check_token_list(got, expected)
|
32
32
|
end
|
33
33
|
|
34
|
+
it "gets tokens for an abbreviated number pattern with a literal period (e.g., for Russian)" do
|
35
|
+
data_reader = TwitterCldr::DataReaders::NumberDataReader.new(:ru, :type => :short_decimal)
|
36
|
+
pattern = data_reader.pattern(1_000)
|
37
|
+
|
38
|
+
expect(pattern).to include("тыс'.'") # ensure that we test with the data we expect
|
39
|
+
|
40
|
+
got = data_reader.tokenizer.tokenize(pattern)
|
41
|
+
|
42
|
+
expected = [
|
43
|
+
{ :value => "", :type => :plaintext },
|
44
|
+
{ :value => "0", :type => :pattern },
|
45
|
+
{ :value => " тыс.", :type => :plaintext }
|
46
|
+
]
|
47
|
+
check_token_list(got, expected)
|
48
|
+
end
|
49
|
+
|
34
50
|
it "correctly parses suffixes (i.e. Russian currency)" do
|
35
51
|
data_reader = TwitterCldr::DataReaders::NumberDataReader.new(:ru, :type => :currency)
|
36
52
|
got = data_reader.tokenizer.tokenize(data_reader.pattern(number))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_cldr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cameron Dutro
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|