twitter_cldr 3.0.4 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/History.txt +5 -0
- data/lib/twitter_cldr/formatters/numbers/rbnf.rb +0 -1
- data/lib/twitter_cldr/resources/unicode_data_importer.rb +0 -1
- data/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb +27 -1
- data/lib/twitter_cldr/tokenizers/token.rb +7 -2
- data/lib/twitter_cldr/version.rb +1 -1
- data/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb +9 -0
- data/spec/tokenizers/numbers/number_tokenizer_spec.rb +16 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZDU2MmRkOWM4ZGYzOWVhM2E1M2ZmNWE3ZGMwYWNmOGVjYTY2ZGQ1Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZDZkNWRmOGMyMTliZmI2ODMzZGYxMzIyNTNjMGRiNTFjOGZlYzYwMg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZDY2OTYwMjIxZDMxZTk0OTJhMDJmNjI2OGEzNjI2YTIwYTc2MDc4YWYxMzkz
|
10
|
+
NDI3YzBhMTE2YWZjODJkYTZlNWJiNDU2ZWEyMGYyMmEzNjY5ODM0YmM4YTll
|
11
|
+
NjQyNzIzMzJkN2EyODlkZThlNjFmNzA2Mzg2MGMzZWJlZDNmZWI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
OGQzNDRjYjQ0NjhjM2Q5MGJlZTQ0NmI3YWQ2YWZiMWVmY2IzNjgyN2Y2ZDli
|
14
|
+
ZjRjYTBlY2M5ZTdlNDc3YWYyZWI4NTdjZTE2NjA1ODA3N2QwNjMyZjQ1Nzkw
|
15
|
+
NjViNzc2OTkzNmI4OGI2YTY0MGI0YzIwMmQ3Yzk1YmY3OGVkNGQ=
|
data/History.txt
CHANGED
@@ -100,7 +100,6 @@ module TwitterCldr
|
|
100
100
|
def rule_set_from_resource(rule_set_data)
|
101
101
|
RuleSet.new(
|
102
102
|
rule_set_data[:rules].map do |rule|
|
103
|
-
binding.pry unless rule[:rule].is_a?(String)
|
104
103
|
Rule.new(rule[:value], rule[:rule], rule[:radix])
|
105
104
|
end,
|
106
105
|
rule_set_data[:type],
|
@@ -7,6 +7,22 @@ module TwitterCldr
|
|
7
7
|
module Tokenizers
|
8
8
|
class NumberTokenizer
|
9
9
|
|
10
|
+
SPECIAL_SYMBOLS_MAP = {
|
11
|
+
'.' => '{DOT}',
|
12
|
+
',' => '{COMMA}',
|
13
|
+
'0' => '{ZERO}',
|
14
|
+
'#' => '{POUND}',
|
15
|
+
'¤' => '{CURRENCY}',
|
16
|
+
'%' => '{PERCENT}',
|
17
|
+
'E' => '{SCIENTIFIC}'
|
18
|
+
}
|
19
|
+
|
20
|
+
SPECIAL_SYMBOLS_REGEX = /'(?:#{SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')})'/
|
21
|
+
|
22
|
+
INVERSE_SPECIAL_SYMBOLS_MAP = SPECIAL_SYMBOLS_MAP.invert
|
23
|
+
|
24
|
+
INVERSE_SPECIAL_SYMBOLS_REGEX = /#{INVERSE_SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')}/
|
25
|
+
|
10
26
|
attr_reader :data_reader
|
11
27
|
|
12
28
|
def initialize(data_reader)
|
@@ -14,7 +30,17 @@ module TwitterCldr
|
|
14
30
|
end
|
15
31
|
|
16
32
|
def tokenize(pattern)
|
17
|
-
|
33
|
+
escaped_pattern = pattern.gsub(SPECIAL_SYMBOLS_REGEX) do |match|
|
34
|
+
SPECIAL_SYMBOLS_MAP[match[1..-2]]
|
35
|
+
end
|
36
|
+
|
37
|
+
tokens = PatternTokenizer.new(data_reader, tokenizer).tokenize(escaped_pattern)
|
38
|
+
|
39
|
+
tokens.each do |token|
|
40
|
+
token.value = token.value.gsub(INVERSE_SPECIAL_SYMBOLS_REGEX) do |match|
|
41
|
+
INVERSE_SPECIAL_SYMBOLS_MAP[match]
|
42
|
+
end
|
43
|
+
end
|
18
44
|
|
19
45
|
if tokens.first.value == ""
|
20
46
|
tokens[1..-1]
|
@@ -14,12 +14,17 @@ module TwitterCldr
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
def to_hash
|
18
|
+
{ :value => @value, :type => @type }
|
19
|
+
end
|
20
|
+
|
17
21
|
def to_s
|
18
22
|
@value
|
19
23
|
end
|
20
24
|
|
21
|
-
|
22
|
-
|
25
|
+
# overriding `to_s` also overrides `inspect`, so we have to redefine it manually
|
26
|
+
def inspect
|
27
|
+
"<#{self.class}: #{instance_variables.map {|v| "#{v}=#{instance_variable_get(v).inspect}" }.join(", ")}>"
|
23
28
|
end
|
24
29
|
end
|
25
30
|
end
|
data/lib/twitter_cldr/version.rb
CHANGED
@@ -67,4 +67,13 @@ describe ShortDecimalFormatter do
|
|
67
67
|
expect(format_number(number)).to match_normalized("9300万")
|
68
68
|
end
|
69
69
|
end
|
70
|
+
|
71
|
+
context "with Russian locale" do
|
72
|
+
let(:locale) { :ru }
|
73
|
+
|
74
|
+
it "formats a number with a literal period" do
|
75
|
+
number = 1_000
|
76
|
+
expect(format_number(number)).to match_normalized("1 тыс.")
|
77
|
+
end
|
78
|
+
end
|
70
79
|
end
|
@@ -31,6 +31,22 @@ describe NumberTokenizer do
|
|
31
31
|
check_token_list(got, expected)
|
32
32
|
end
|
33
33
|
|
34
|
+
it "gets tokens for an abbreviated number pattern with a literal period (e.g., for Russian)" do
|
35
|
+
data_reader = TwitterCldr::DataReaders::NumberDataReader.new(:ru, :type => :short_decimal)
|
36
|
+
pattern = data_reader.pattern(1_000)
|
37
|
+
|
38
|
+
expect(pattern).to include("тыс'.'") # ensure that we test with the data we expect
|
39
|
+
|
40
|
+
got = data_reader.tokenizer.tokenize(pattern)
|
41
|
+
|
42
|
+
expected = [
|
43
|
+
{ :value => "", :type => :plaintext },
|
44
|
+
{ :value => "0", :type => :pattern },
|
45
|
+
{ :value => " тыс.", :type => :plaintext }
|
46
|
+
]
|
47
|
+
check_token_list(got, expected)
|
48
|
+
end
|
49
|
+
|
34
50
|
it "correctly parses suffixes (i.e. Russian currency)" do
|
35
51
|
data_reader = TwitterCldr::DataReaders::NumberDataReader.new(:ru, :type => :currency)
|
36
52
|
got = data_reader.tokenizer.tokenize(data_reader.pattern(number))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_cldr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cameron Dutro
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|