twitter_cldr 1.4.1 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/NOTICE +36 -2
- data/README.md +2 -2
- data/lib/twitter_cldr/collation/collator.rb +143 -0
- data/lib/twitter_cldr/collation/implicit_collation_elements.rb +188 -0
- data/lib/twitter_cldr/collation/sort_key.rb +199 -0
- data/lib/twitter_cldr/collation/trie.rb +73 -0
- data/lib/twitter_cldr/collation/trie_builder.rb +56 -0
- data/lib/twitter_cldr/collation.rb +14 -0
- data/lib/twitter_cldr/core_ext/localized_object.rb +3 -2
- data/lib/twitter_cldr/core_ext/string.rb +1 -1
- data/lib/twitter_cldr/formatters/calendars/datetime_formatter.rb +89 -72
- data/lib/twitter_cldr/normalization/base.rb +22 -0
- data/lib/twitter_cldr/normalization/hangul.rb +68 -0
- data/lib/twitter_cldr/{normalizers → normalization}/nfc.rb +2 -2
- data/lib/twitter_cldr/{normalizers → normalization}/nfd.rb +1 -1
- data/lib/twitter_cldr/{normalizers → normalization}/nfkc.rb +5 -17
- data/lib/twitter_cldr/{normalizers → normalization}/nfkd.rb +3 -18
- data/lib/twitter_cldr/normalization.rb +15 -0
- data/lib/twitter_cldr/shared/code_point.rb +5 -3
- data/lib/twitter_cldr/tokenizers/base.rb +15 -1
- data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +6 -1
- data/lib/twitter_cldr/utils/code_points.rb +1 -1
- data/lib/twitter_cldr/version.rb +2 -2
- data/lib/twitter_cldr.rb +9 -8
- data/resources/collation/FractionalUCA_SHORT.txt +41593 -0
- data/resources/locales/af/calendars.yml +164 -0
- data/resources/locales/af/languages.yml +173 -0
- data/resources/locales/af/numbers.yml +42 -0
- data/resources/locales/af/plurals.yml +2 -0
- data/resources/locales/af/units.yml +88 -0
- data/resources/locales/ar/calendars.yml +9 -0
- data/resources/locales/ar/numbers.yml +15 -2
- data/resources/locales/ca/calendars.yml +228 -0
- data/resources/locales/ca/languages.yml +510 -0
- data/resources/locales/ca/numbers.yml +43 -0
- data/resources/locales/ca/plurals.yml +2 -0
- data/resources/locales/ca/units.yml +93 -0
- data/resources/locales/cs/calendars.yml +229 -0
- data/resources/locales/cs/languages.yml +471 -0
- data/resources/locales/cs/numbers.yml +44 -0
- data/resources/locales/cs/plurals.yml +2 -0
- data/resources/locales/cs/units.yml +114 -0
- data/resources/locales/da/calendars.yml +10 -0
- data/resources/locales/da/numbers.yml +13 -0
- data/resources/locales/de/calendars.yml +9 -0
- data/resources/locales/de/numbers.yml +13 -0
- data/resources/locales/el/calendars.yml +227 -0
- data/resources/locales/el/languages.yml +519 -0
- data/resources/locales/el/numbers.yml +42 -0
- data/resources/locales/el/plurals.yml +2 -0
- data/resources/locales/el/units.yml +107 -0
- data/resources/locales/en/calendars.yml +10 -0
- data/resources/locales/en/numbers.yml +13 -0
- data/resources/locales/es/calendars.yml +9 -0
- data/resources/locales/es/numbers.yml +13 -0
- data/resources/locales/eu/calendars.yml +173 -0
- data/resources/locales/eu/languages.yml +161 -0
- data/resources/locales/eu/numbers.yml +43 -0
- data/resources/locales/eu/plurals.yml +2 -0
- data/resources/locales/eu/units.yml +91 -0
- data/resources/locales/fa/calendars.yml +10 -0
- data/resources/locales/fa/numbers.yml +13 -0
- data/resources/locales/fi/calendars.yml +10 -0
- data/resources/locales/fi/numbers.yml +14 -1
- data/resources/locales/fil/calendars.yml +8 -0
- data/resources/locales/fil/numbers.yml +13 -0
- data/resources/locales/fr/calendars.yml +9 -0
- data/resources/locales/fr/numbers.yml +14 -1
- data/resources/locales/he/calendars.yml +9 -0
- data/resources/locales/he/numbers.yml +13 -0
- data/resources/locales/hi/calendars.yml +8 -0
- data/resources/locales/hi/numbers.yml +13 -0
- data/resources/locales/hu/calendars.yml +10 -0
- data/resources/locales/hu/numbers.yml +15 -2
- data/resources/locales/id/calendars.yml +8 -0
- data/resources/locales/id/numbers.yml +16 -3
- data/resources/locales/it/calendars.yml +9 -0
- data/resources/locales/it/numbers.yml +13 -0
- data/resources/locales/ja/calendars.yml +9 -0
- data/resources/locales/ja/numbers.yml +13 -0
- data/resources/locales/ko/calendars.yml +9 -0
- data/resources/locales/ko/numbers.yml +13 -0
- data/resources/locales/ms/calendars.yml +8 -0
- data/resources/locales/ms/numbers.yml +16 -3
- data/resources/locales/nb/calendars.yml +234 -0
- data/resources/locales/{no → nb}/languages.yml +25 -4
- data/resources/locales/nb/numbers.yml +43 -0
- data/resources/locales/nb/plurals.yml +2 -0
- data/resources/locales/nb/units.yml +87 -0
- data/resources/locales/nl/calendars.yml +10 -0
- data/resources/locales/nl/numbers.yml +13 -0
- data/resources/locales/pl/calendars.yml +9 -0
- data/resources/locales/pl/numbers.yml +14 -1
- data/resources/locales/pt/calendars.yml +9 -0
- data/resources/locales/pt/numbers.yml +13 -0
- data/resources/locales/ru/calendars.yml +10 -0
- data/resources/locales/ru/numbers.yml +14 -1
- data/resources/locales/sv/calendars.yml +10 -0
- data/resources/locales/sv/numbers.yml +14 -1
- data/resources/locales/th/calendars.yml +67 -57
- data/resources/locales/th/numbers.yml +13 -0
- data/resources/locales/tr/calendars.yml +9 -0
- data/resources/locales/tr/numbers.yml +13 -0
- data/resources/locales/uk/calendars.yml +199 -0
- data/resources/locales/uk/languages.yml +519 -0
- data/resources/locales/uk/numbers.yml +45 -0
- data/resources/locales/uk/plurals.yml +2 -0
- data/resources/locales/uk/units.yml +135 -0
- data/resources/locales/ur/calendars.yml +9 -0
- data/resources/locales/ur/numbers.yml +13 -0
- data/resources/locales/zh/calendars.yml +8 -0
- data/resources/locales/zh/numbers.yml +13 -0
- data/resources/locales/zh-Hant/calendars.yml +8 -0
- data/resources/locales/zh-Hant/numbers.yml +16 -3
- data/resources/locales/zh-Hant/plurals.yml +2 -0
- data/resources/unicode_data/hangul_blocks.yml +21 -0
- data/spec/collation/CollationTest_CLDR_NON_IGNORABLE_Short.txt +714 -0
- data/spec/collation/collation_spec.rb +93 -0
- data/spec/collation/collator_spec.rb +117 -0
- data/spec/collation/implicit_collation_elements_spec.rb +24 -0
- data/spec/collation/sort_key_spec.rb +56 -0
- data/spec/collation/trie_builder_spec.rb +114 -0
- data/spec/collation/trie_spec.rb +97 -0
- data/spec/core_ext/calendars/datetime_spec.rb +5 -0
- data/spec/core_ext/calendars_spec.rb +34 -0
- data/spec/core_ext/numbers_spec.rb +39 -0
- data/spec/core_ext/string_spec.rb +4 -4
- data/spec/formatters/calendars/datetime_formatter_spec.rb +92 -2
- data/spec/{normalizers → normalization}/NormalizationTestShort.txt +0 -0
- data/spec/{normalizers → normalization}/base_spec.rb +1 -1
- data/spec/normalization/hangul_spec.rb +42 -0
- data/spec/{normalizers → normalization}/normalization_spec.rb +15 -16
- data/spec/readme_spec.rb +2 -2
- data/spec/shared/code_point_spec.rb +42 -30
- data/spec/shared/resources_spec.rb +30 -6
- data/spec/tokenizers/base_spec.rb +17 -0
- data/spec/twitter_cldr_spec.rb +1 -1
- metadata +71 -83
- data/lib/twitter_cldr/normalizers/base.rb +0 -34
- data/lib/twitter_cldr/normalizers.rb +0 -14
- data/resources/locales/no/calendars.yml +0 -127
- data/resources/locales/no/numbers.yml +0 -29
- data/resources/locales/no/plurals.yml +0 -1
- data/resources/unicode_data/blocks_hangul.yml +0 -46
- data/spec/normalizers/NormalizationTest.txt +0 -18431
@@ -0,0 +1,93 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
require 'open-uri'
|
9
|
+
require 'zip'
|
10
|
+
|
11
|
+
include TwitterCldr::Collation
|
12
|
+
|
13
|
+
describe 'Unicode Collation Algorithm' do
|
14
|
+
|
15
|
+
SHORT_COLLATION_TEST_PATH = File.join(File.dirname(__FILE__), 'CollationTest_CLDR_NON_IGNORABLE_Short.txt')
|
16
|
+
FULL_COLLATION_TEST_PATH = File.join(File.dirname(__FILE__), 'CollationTest_CLDR_NON_IGNORABLE.txt')
|
17
|
+
|
18
|
+
FULL_COLLATION_TEST_URL = 'http://unicode.org/Public/UCA/latest/CollationAuxiliary.zip'
|
19
|
+
|
20
|
+
it 'passes all the tests in CollationTest_CLDR_NON_IGNORABLE_Short.txt' do
|
21
|
+
run_test(SHORT_COLLATION_TEST_PATH)
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'passes all the tests in CollationTest_CLDR_NON_IGNORABLE.txt', :slow => true do
|
25
|
+
prepare_full_test
|
26
|
+
run_test(FULL_COLLATION_TEST_PATH)
|
27
|
+
end
|
28
|
+
|
29
|
+
def run_test(file_path)
|
30
|
+
collator = Collator.new
|
31
|
+
|
32
|
+
previous_sort_key = previous_code_points = previous_hex_code_points = nil
|
33
|
+
|
34
|
+
open(file_path, 'r:utf-8') do |file|
|
35
|
+
file.each do |line|
|
36
|
+
next unless /^([0-9A-F ]+);/ =~ line
|
37
|
+
|
38
|
+
current_code_points = $1.split
|
39
|
+
current_hex_code_points = current_code_points.map { |cp| cp.to_i(16) }
|
40
|
+
|
41
|
+
current_sort_key = collator.sort_key(current_code_points)
|
42
|
+
|
43
|
+
if previous_sort_key
|
44
|
+
result = (previous_sort_key <=> current_sort_key).nonzero? || (previous_hex_code_points <=> current_hex_code_points)
|
45
|
+
result.should(eq(-1), error_message(previous_code_points, previous_sort_key, current_code_points, current_sort_key))
|
46
|
+
end
|
47
|
+
|
48
|
+
previous_sort_key = current_sort_key
|
49
|
+
previous_code_points = current_code_points
|
50
|
+
previous_hex_code_points = current_hex_code_points
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Generates a descriptive error message test failure.
|
56
|
+
#
|
57
|
+
def error_message(previous_code_points, previous_sort_key, current_code_points, current_sort_key)
|
58
|
+
<<END
|
59
|
+
Expected previous code points sequence to sort before the current one.
|
60
|
+
|
61
|
+
previous:
|
62
|
+
code points - #{previous_code_points.join(' ')}
|
63
|
+
sort key - #{pretty_sort_key(previous_sort_key)}
|
64
|
+
current:
|
65
|
+
code points - #{current_code_points.join(' ')}
|
66
|
+
sort key - #{pretty_sort_key(current_sort_key)}
|
67
|
+
END
|
68
|
+
end
|
69
|
+
|
70
|
+
# Downloads full version of the test if necessary.
|
71
|
+
#
|
72
|
+
def prepare_full_test
|
73
|
+
return if File.file?(FULL_COLLATION_TEST_PATH)
|
74
|
+
|
75
|
+
print ' Downloading CollationAuxillary.zip ... '
|
76
|
+
zip_file = Tempfile.new('CollationAuxillary.zip')
|
77
|
+
zip_file.write(open(FULL_COLLATION_TEST_URL).read)
|
78
|
+
zip_file.close
|
79
|
+
|
80
|
+
print 'extracting CollationTest_CLDR_NON_IGNORABLE.txt ... '
|
81
|
+
Zip::ZipFile.open(zip_file.path) do |zip|
|
82
|
+
open(FULL_COLLATION_TEST_PATH, 'w') { |file| file.write(zip.read('CollationAuxiliary/CollationTest_CLDR_NON_IGNORABLE.txt')) }
|
83
|
+
end
|
84
|
+
zip_file.unlink
|
85
|
+
|
86
|
+
puts 'done.'
|
87
|
+
end
|
88
|
+
|
89
|
+
def pretty_sort_key(current_sort_key)
|
90
|
+
"[#{current_sort_key.map{ |byte| byte.to_s(16).upcase }.join(', ')}]"
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Collation
|
9
|
+
|
10
|
+
describe Collator do
|
11
|
+
|
12
|
+
before :each do
|
13
|
+
Collator.instance_variable_set(:@trie, nil)
|
14
|
+
end
|
15
|
+
|
16
|
+
after :all do
|
17
|
+
Collator.instance_variable_set(:@trie, nil)
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '.trie' do
|
21
|
+
it 'returns collation elements trie' do
|
22
|
+
mock(TrieBuilder).load_trie(Collator::FRACTIONAL_UCA_SHORT_RESOURCE) { 'trie' }
|
23
|
+
Collator.trie.should == 'trie'
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'loads the trie only once' do
|
27
|
+
mock(TrieBuilder).load_trie(Collator::FRACTIONAL_UCA_SHORT_RESOURCE) { 'trie' }
|
28
|
+
|
29
|
+
Collator.trie.object_id.should == Collator.trie.object_id
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe '#trie' do
|
34
|
+
it 'delegates to the class method' do
|
35
|
+
mock(Collator).trie { 'trie' }
|
36
|
+
Collator.new.trie.should == 'trie'
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'calls class method only once' do
|
40
|
+
mock(Collator).trie { 'trie' }
|
41
|
+
|
42
|
+
collator = Collator.new
|
43
|
+
collator.trie.object_id.should == collator.trie.object_id
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe '#sort_key' do
|
48
|
+
let(:collator) { Collator.new }
|
49
|
+
let(:string) { 'abc' }
|
50
|
+
let(:code_points_hex) { %w[0061 0062 0063] }
|
51
|
+
let(:code_points) { code_points_hex.map { |cp| cp.to_i(16) } }
|
52
|
+
let(:sort_key) { [9986, 10498, 11010, 0, 1282, 1282, 1282, 0, 1282, 1282, 1282] }
|
53
|
+
|
54
|
+
before(:each) { mock(collator).sort_key_for_code_points(code_points) { sort_key } }
|
55
|
+
|
56
|
+
it 'calculates sort key for a string' do
|
57
|
+
mock(TwitterCldr::Utils::CodePoints).from_string(string) { code_points_hex }
|
58
|
+
collator.sort_key(string).should == sort_key
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'calculates sort key for an array of code points (represented as hex strings)' do
|
62
|
+
dont_allow(TwitterCldr::Utils::CodePoints).from_string(string)
|
63
|
+
collator.sort_key(code_points_hex).should == sort_key
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe '#compare' do
|
68
|
+
let(:collator) { Collator.new }
|
69
|
+
let(:sort_key) { [1, 3, 8, 9] }
|
70
|
+
let(:another_sort_key) { [6, 8, 9, 2] }
|
71
|
+
|
72
|
+
it 'compares strings by sort keys' do
|
73
|
+
stub_sort_key(collator, 'foo', sort_key)
|
74
|
+
stub_sort_key(collator, 'bar', another_sort_key)
|
75
|
+
|
76
|
+
collator.compare('foo', 'bar').should == -1
|
77
|
+
collator.compare('bar', 'foo').should == 1
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'returns 0 without computing sort keys if strings are equal' do
|
81
|
+
dont_allow(collator).sort_key
|
82
|
+
|
83
|
+
collator.compare('foo', 'foo').should == 0
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'compares strings by code points if the sort keys are equal' do
|
87
|
+
stub(collator).sort_key { sort_key }
|
88
|
+
|
89
|
+
collator.compare('bar', 'foo').should == -1
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe '#sort' do
|
94
|
+
let(:collator) { Collator.new }
|
95
|
+
|
96
|
+
it 'sorts strings by sort keys' do
|
97
|
+
[['aaa', [1, 2, 3]], ['abc', [1, 3, 4]], ['bca', [2, 5, 9]]].each { |s, key| mock_sort_key(collator, s, key) }
|
98
|
+
|
99
|
+
collator.sort(%w[bca aaa abc]).should == %w[aaa abc bca]
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'sorts strings with equal sort keys by code points' do
|
103
|
+
[['aaa', [1, 2, 3]], ['abc', [1, 2, 3]], ['bca', [1, 2, 3]]].each { |s, key| mock_sort_key(collator, s, key) }
|
104
|
+
|
105
|
+
collator.sort(%w[bca abc aaa]).should == %w[aaa abc bca]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def mock_sort_key(collator, string, sort_key)
|
110
|
+
mock(collator).sort_key(TwitterCldr::Utils::CodePoints.from_string(string)) { sort_key }
|
111
|
+
end
|
112
|
+
|
113
|
+
def stub_sort_key(collator, string, sort_key)
|
114
|
+
stub(collator).sort_key(TwitterCldr::Utils::CodePoints.from_string(string)) { sort_key }
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Collation
|
9
|
+
|
10
|
+
describe ImplicitCollationElements do
|
11
|
+
|
12
|
+
it 'computes correct implicit value for non-CJK code points' do
|
13
|
+
ImplicitCollationElements.for_code_point(0xD801).should == [[0xE305C758, 0x5, 0x5]]
|
14
|
+
ImplicitCollationElements.for_code_point(0xC0001).should == [[0xE44E70AC, 0x5, 0x5]]
|
15
|
+
ImplicitCollationElements.for_code_point(0xFFF02).should == [[0xE4C25F74, 0x5, 0x5]]
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'computes correct implicit values for CJK code points' do
|
19
|
+
ImplicitCollationElements.for_code_point(0x4E00).should == [[0xE00406, 0x5, 0x5]]
|
20
|
+
ImplicitCollationElements.for_code_point(0x3400).should == [[0xE0ABCE, 0x5, 0x5]]
|
21
|
+
ImplicitCollationElements.for_code_point(0x20000).should == [[0xE1302590, 0x5, 0x5]]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Collation
|
9
|
+
|
10
|
+
describe SortKey do
|
11
|
+
|
12
|
+
let(:sort_key) { SortKey.new(collation_elements) }
|
13
|
+
let(:collation_elements) { [[63, 13, 149], [66, 81, 143]] }
|
14
|
+
let(:sort_key_bytes) { [63, 66, 1, 13, 81, 1, 149, 143] }
|
15
|
+
|
16
|
+
describe '.build' do
|
17
|
+
it 'returns a sort key for a given array of collation elements' do
|
18
|
+
sort_key = SortKey.new(collation_elements)
|
19
|
+
|
20
|
+
mock(SortKey).new(collation_elements) { sort_key }
|
21
|
+
mock(sort_key).bytes_array { sort_key_bytes }
|
22
|
+
|
23
|
+
SortKey.build(collation_elements).should == sort_key_bytes
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe '#initialize' do
|
28
|
+
it 'assigns collation elements array' do
|
29
|
+
SortKey.new(collation_elements).collation_elements.should == collation_elements
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe '#bytes_array' do
|
34
|
+
it 'builds sort key bytes' do
|
35
|
+
sort_key.bytes_array.should == sort_key_bytes
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'builds bytes array only once' do
|
39
|
+
mock(sort_key).build_bytes_array { sort_key_bytes }
|
40
|
+
sort_key.bytes_array.object_id == sort_key.bytes_array.object_id
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'compresses secondary weights' do
|
44
|
+
SortKey.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'compresses tertiary weights' do
|
48
|
+
SortKey.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 132, 167, 6]
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'compresses secondary and tertiary weights into multiple bytes if necessary' do
|
52
|
+
SortKey.new([[39, 5, 5]] * 100).bytes_array.should == [39] * 100 + [1, 69, 40, 1, 48, 48, 18]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Collation
|
9
|
+
|
10
|
+
describe TrieBuilder do
|
11
|
+
|
12
|
+
describe '#build' do
|
13
|
+
describe 'fractional CE trie hash' do
|
14
|
+
let(:trie_builder) do
|
15
|
+
builder = TrieBuilder.new('resource')
|
16
|
+
stub(builder).load_collation_elements_table { FRACTIONAL_UCA_SHORT_STUB }
|
17
|
+
builder
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'returns a trie' do
|
21
|
+
trie_builder.is_a?(Trie)
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'adds every collation element from the FractionalUCA_SHORT.txt file to the trie' do
|
25
|
+
mock(Trie).new { TrieStub.new }
|
26
|
+
|
27
|
+
trie_builder.build.storage.should == COLLATION_ELEMENTS_TABLE
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
class TrieStub
|
35
|
+
attr_accessor :storage
|
36
|
+
|
37
|
+
def initialize
|
38
|
+
self.storage = []
|
39
|
+
end
|
40
|
+
|
41
|
+
def add(code_points, collation_element)
|
42
|
+
storage << [code_points, collation_element]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
FRACTIONAL_UCA_SHORT_STUB = <<END
|
47
|
+
# Fractional UCA Table, generated from standard UCA
|
48
|
+
# 2012-01-03, 21:52:55 GMT [MD]
|
49
|
+
# VERSION: UCA=6.1.0, UCD=6.1.0
|
50
|
+
# For a description of the format and usage, see CollationAuxiliary.html
|
51
|
+
|
52
|
+
[UCA version = 6.1.0]
|
53
|
+
|
54
|
+
0000; [,,]
|
55
|
+
030C; [, 97, 05]
|
56
|
+
215E; [20, 05, 3B][0D 75 2C, 05, 3B][22, 05, 3D]
|
57
|
+
FC63; [, D3 A9, 33][, D5 11, 33]
|
58
|
+
0E40 0E01; [72 0A, 05, 05][72 7E, 05, 3D]
|
59
|
+
0E40 0E02; [72 0C, 05, 05][72 7E, 05, 3D]
|
60
|
+
|
61
|
+
# HOMELESS COLLATION ELEMENTS
|
62
|
+
FDD0 0063; [, 97, 3D]
|
63
|
+
FDD0 0064; [, A7, 09]
|
64
|
+
|
65
|
+
# SPECIAL MAX/MIN COLLATION ELEMENTS
|
66
|
+
|
67
|
+
FFFE; [02, 02, 02] # Special LOWEST primary, for merge/interleaving
|
68
|
+
FFFF; [EF FE, 05, 05] # Special HIGHEST primary, for ranges
|
69
|
+
|
70
|
+
# Top Byte => Reordering Tokens
|
71
|
+
[top_byte 00 TERMINATOR ] # [0] TERMINATOR=1
|
72
|
+
[top_byte 01 LEVEL-SEPARATOR ] # [0] LEVEL-SEPARATOR=1
|
73
|
+
[top_byte 02 FIELD-SEPARATOR ] # [0] FIELD-SEPARATOR=1
|
74
|
+
[top_byte 03 SPACE ] # [9] SPACE=1 Cc=6 Zl=1 Zp=1 Zs=1
|
75
|
+
|
76
|
+
# VALUES BASED ON UCA
|
77
|
+
[first tertiary ignorable [,,]] # CONSTRUCTED
|
78
|
+
[last tertiary ignorable [,,]] # CONSTRUCTED
|
79
|
+
# Warning: Case bits are masked in the following
|
80
|
+
[first tertiary in secondary non-ignorable [X, X, 05]] # U+0332 COMBINING LOW LINE
|
81
|
+
[last tertiary in secondary non-ignorable [X, X, 3D]] # U+2A74 DOUBLE COLON EQUAL
|
82
|
+
END
|
83
|
+
|
84
|
+
COLLATION_ELEMENTS_TABLE = [
|
85
|
+
# 0000; [,,]
|
86
|
+
[[0], [[0, 0, 0]]],
|
87
|
+
|
88
|
+
# 030C; [, 97, 05]
|
89
|
+
[[780], [[0, 151, 5]]],
|
90
|
+
|
91
|
+
# 215E; [20, 05, 3B][0D 75 2C, 05, 3B][22, 05, 3D]
|
92
|
+
[[8542], [[32, 5, 59], [881964, 5, 59], [34, 5, 61]]],
|
93
|
+
|
94
|
+
# FC63; [, D3 A9, 33][, D5 11, 33]
|
95
|
+
[[64611], [[0, 54185, 51], [0, 54545, 51]]],
|
96
|
+
|
97
|
+
# 0E40 0E01; [72 0A, 05, 05][72 7E, 05, 3D]
|
98
|
+
[[3648, 3585], [[29194, 5, 5], [29310, 5, 61]]],
|
99
|
+
|
100
|
+
# 0E40 0E02; [72 0C, 05, 05][72 7E, 05, 3D]
|
101
|
+
[[3648, 3586], [[29196, 5, 5], [29310, 5, 61]]],
|
102
|
+
|
103
|
+
# FDD0 0063; [, 97, 3D]
|
104
|
+
[[64976, 99], [[0, 151, 61]]],
|
105
|
+
|
106
|
+
# FDD0 0064; [, A7, 09]
|
107
|
+
[[64976, 100], [[0, 167, 9]]],
|
108
|
+
|
109
|
+
# FFFE; [02, 02, 02]
|
110
|
+
[[65534], [[2, 2, 2]]],
|
111
|
+
|
112
|
+
# FFFF; [EF FE, 05, 05]
|
113
|
+
[[65535], [[61438, 5, 5]]]
|
114
|
+
]
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Collation
|
9
|
+
|
10
|
+
describe Trie do
|
11
|
+
|
12
|
+
let(:trie) { Trie.new }
|
13
|
+
|
14
|
+
let(:values) do
|
15
|
+
[
|
16
|
+
[[1], '1' ],
|
17
|
+
[[1, 4], '14' ],
|
18
|
+
[[1, 5], '15' ],
|
19
|
+
[[1, 4, 8], '148'],
|
20
|
+
[[2], '2' ],
|
21
|
+
[[2, 7, 5], '275'],
|
22
|
+
[[3, 9], '39' ]
|
23
|
+
]
|
24
|
+
end
|
25
|
+
|
26
|
+
before(:each) do
|
27
|
+
values.each { |key, value| trie.add(key, value) }
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#get' do
|
31
|
+
it 'returns nil for non existing keys' do
|
32
|
+
[[6], [3], [1, 4, 3], [2, 7, 5, 6, 9]].each { |key| trie.get(key).should be_nil }
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns value and key size for each existing key' do
|
36
|
+
values.each { |key, value| trie.get(key).should == value }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#add' do
|
41
|
+
it 'overrides values' do
|
42
|
+
trie.get([1, 4]).should == '14'
|
43
|
+
|
44
|
+
trie.add([1, 4], '14-new')
|
45
|
+
trie.get([1, 4]).should == '14-new'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#find_prefix' do
|
50
|
+
describe 'first (value) and third (prefix size) elements of the returned array' do
|
51
|
+
it 'value is 0 nil and prefix size is 0 if the prefix was not found' do
|
52
|
+
test_find_prefix(trie, [4], nil, 0)
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'stored value and key size as a prefix size if the whole key was found' do
|
56
|
+
values.each do |key, value|
|
57
|
+
test_find_prefix(trie, key, value)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'stored value and size of the corresponding prefix if only part of the key was found' do
|
62
|
+
tests = {
|
63
|
+
[1, 9] => ['1', 1],
|
64
|
+
[1, 4, 2] => ['14', 2],
|
65
|
+
[1, 4, 8, 9, 2] => ['148', 3],
|
66
|
+
[2, 7, 5, 5] => ['275', 3]
|
67
|
+
}
|
68
|
+
|
69
|
+
tests.each { |key, result| test_find_prefix(trie, key, *result) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_find_prefix(trie, key, value, size = key.size)
|
73
|
+
result = trie.find_prefix(key)
|
74
|
+
|
75
|
+
result[0].should == value
|
76
|
+
result[2].should == size
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe 'second (subtrie) element of the returned array' do
|
81
|
+
it 'is a hash of possible suffixes for the prefix that was found' do
|
82
|
+
trie.find_prefix([1, 4, 8])[1].should == {}
|
83
|
+
trie.find_prefix([2, 7])[1].should == { 5 => ["275", { }] }
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'is a hash representing the whole trie if the prefix was not found' do
|
87
|
+
trie.find_prefix([404])[1].should == {
|
88
|
+
1 => ['1', { 4 => ['14', { 8 => ['148', {}] }], 5 => ['15', {}] }],
|
89
|
+
2 => ['2', { 7 => [nil, { 5 => ['275', {}] }] }],
|
90
|
+
3 => [nil, { 9 => ['39', {}] }]
|
91
|
+
}
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
@@ -35,6 +35,11 @@ describe DateTime do
|
|
35
35
|
loc_date.to_date.calendar_type.should == :buddhist
|
36
36
|
loc_date.to_time.calendar_type.should == :buddhist
|
37
37
|
end
|
38
|
+
|
39
|
+
it "should default to English if the given locale isn't supported" do
|
40
|
+
loc_date = date.localize(:xx)
|
41
|
+
loc_date.locale.should == :en
|
42
|
+
end
|
38
43
|
end
|
39
44
|
|
40
45
|
describe "stringify" do
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Tokenizers
|
9
|
+
|
10
|
+
describe "Calendars" do
|
11
|
+
it "makes sure datetime formatters for every locale don't raise errors" do
|
12
|
+
TwitterCldr.supported_locales.each do |locale|
|
13
|
+
DateTimeTokenizer::VALID_TYPES.each do |type|
|
14
|
+
lambda { DateTime.now.localize(locale).send(:"to_#{type}_s") }.should_not raise_error
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
it "makes sure date formatters for every locale don't raise errors" do
|
20
|
+
TwitterCldr.supported_locales.each do |locale|
|
21
|
+
DateTimeTokenizer::VALID_TYPES.each do |type|
|
22
|
+
lambda { Date.today.localize(locale).send(:"to_#{type}_s") }.should_not raise_error
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
it "makes sure time formatters for every locale don't raise errors" do
|
28
|
+
TwitterCldr.supported_locales.each do |locale|
|
29
|
+
DateTimeTokenizer::VALID_TYPES.each do |type|
|
30
|
+
lambda { Time.now.localize(locale).send(:"to_#{type}_s") }.should_not raise_error
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr
|
9
|
+
|
10
|
+
describe "Numbers" do
|
11
|
+
it "makes sure currency formatters for every locale don't raise errors" do
|
12
|
+
TwitterCldr.supported_locales.each do |locale|
|
13
|
+
lambda { 1337.localize(locale).to_currency.to_s }.should_not raise_error
|
14
|
+
lambda { 1337.localize(locale).to_currency.to_s(:precision => 3) }.should_not raise_error
|
15
|
+
lambda { 1337.localize(locale).to_currency.to_s(:precision => 3, :currency => "EUR") }.should_not raise_error
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
it "makes sure decimal formatters for every locale don't raise errors" do
|
20
|
+
TwitterCldr.supported_locales.each do |locale|
|
21
|
+
lambda { 1337.localize(locale).to_decimal.to_s }.should_not raise_error
|
22
|
+
lambda { 1337.localize(locale).to_decimal.to_s(:precision => 3) }.should_not raise_error
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
it "makes sure percentage formatters for every locale don't raise errors" do
|
27
|
+
TwitterCldr.supported_locales.each do |locale|
|
28
|
+
lambda { 1337.localize(locale).to_percent.to_s }.should_not raise_error
|
29
|
+
lambda { 1337.localize(locale).to_percent.to_s(:precision => 3) }.should_not raise_error
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
it "makes sure basic number formatters for every locale don't raise errors" do
|
34
|
+
TwitterCldr.supported_locales.each do |locale|
|
35
|
+
lambda { 1337.localize(locale).to_s }.should_not raise_error
|
36
|
+
lambda { 1337.localize(locale).to_s(:precision => 3) }.should_not raise_error
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -15,8 +15,8 @@ describe String do
|
|
15
15
|
end
|
16
16
|
|
17
17
|
it "uses default locale if it's not explicitly specified" do
|
18
|
-
mock(TwitterCldr).get_locale { :
|
19
|
-
'foo'.localize.locale.should == :
|
18
|
+
mock(TwitterCldr).get_locale { :ja }
|
19
|
+
'foo'.localize.locale.should == :ja
|
20
20
|
end
|
21
21
|
|
22
22
|
it 'uses provided locale if there is one' do
|
@@ -111,7 +111,7 @@ describe LocalizedString do
|
|
111
111
|
|
112
112
|
describe "#normalize" do
|
113
113
|
it "returns a normalized instance of LocalizedString, defaults to NFD" do
|
114
|
-
mock.proxy(TwitterCldr::
|
114
|
+
mock.proxy(TwitterCldr::Normalization::NFD).normalize("español")
|
115
115
|
"español".bytes.to_a.should == [101, 115, 112, 97, 195, 177, 111, 108]
|
116
116
|
result = "español".localize.normalize
|
117
117
|
result.should be_a(LocalizedString)
|
@@ -119,7 +119,7 @@ describe LocalizedString do
|
|
119
119
|
end
|
120
120
|
|
121
121
|
it "returns a normalized instance of LocalizedString using the specified algorithm" do
|
122
|
-
mock.proxy(TwitterCldr::
|
122
|
+
mock.proxy(TwitterCldr::Normalization::NFKD).normalize("español")
|
123
123
|
"español".bytes.to_a.should == [101, 115, 112, 97, 195, 177, 111, 108]
|
124
124
|
result = "español".localize.normalize(:using => :NFKD)
|
125
125
|
result.should be_a(LocalizedString)
|