twitter_cldr 1.4.1 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. data/NOTICE +36 -2
  2. data/README.md +2 -2
  3. data/lib/twitter_cldr/collation/collator.rb +143 -0
  4. data/lib/twitter_cldr/collation/implicit_collation_elements.rb +188 -0
  5. data/lib/twitter_cldr/collation/sort_key.rb +199 -0
  6. data/lib/twitter_cldr/collation/trie.rb +73 -0
  7. data/lib/twitter_cldr/collation/trie_builder.rb +56 -0
  8. data/lib/twitter_cldr/collation.rb +14 -0
  9. data/lib/twitter_cldr/core_ext/localized_object.rb +3 -2
  10. data/lib/twitter_cldr/core_ext/string.rb +1 -1
  11. data/lib/twitter_cldr/formatters/calendars/datetime_formatter.rb +89 -72
  12. data/lib/twitter_cldr/normalization/base.rb +22 -0
  13. data/lib/twitter_cldr/normalization/hangul.rb +68 -0
  14. data/lib/twitter_cldr/{normalizers → normalization}/nfc.rb +2 -2
  15. data/lib/twitter_cldr/{normalizers → normalization}/nfd.rb +1 -1
  16. data/lib/twitter_cldr/{normalizers → normalization}/nfkc.rb +5 -17
  17. data/lib/twitter_cldr/{normalizers → normalization}/nfkd.rb +3 -18
  18. data/lib/twitter_cldr/normalization.rb +15 -0
  19. data/lib/twitter_cldr/shared/code_point.rb +5 -3
  20. data/lib/twitter_cldr/tokenizers/base.rb +15 -1
  21. data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +6 -1
  22. data/lib/twitter_cldr/utils/code_points.rb +1 -1
  23. data/lib/twitter_cldr/version.rb +2 -2
  24. data/lib/twitter_cldr.rb +9 -8
  25. data/resources/collation/FractionalUCA_SHORT.txt +41593 -0
  26. data/resources/locales/af/calendars.yml +164 -0
  27. data/resources/locales/af/languages.yml +173 -0
  28. data/resources/locales/af/numbers.yml +42 -0
  29. data/resources/locales/af/plurals.yml +2 -0
  30. data/resources/locales/af/units.yml +88 -0
  31. data/resources/locales/ar/calendars.yml +9 -0
  32. data/resources/locales/ar/numbers.yml +15 -2
  33. data/resources/locales/ca/calendars.yml +228 -0
  34. data/resources/locales/ca/languages.yml +510 -0
  35. data/resources/locales/ca/numbers.yml +43 -0
  36. data/resources/locales/ca/plurals.yml +2 -0
  37. data/resources/locales/ca/units.yml +93 -0
  38. data/resources/locales/cs/calendars.yml +229 -0
  39. data/resources/locales/cs/languages.yml +471 -0
  40. data/resources/locales/cs/numbers.yml +44 -0
  41. data/resources/locales/cs/plurals.yml +2 -0
  42. data/resources/locales/cs/units.yml +114 -0
  43. data/resources/locales/da/calendars.yml +10 -0
  44. data/resources/locales/da/numbers.yml +13 -0
  45. data/resources/locales/de/calendars.yml +9 -0
  46. data/resources/locales/de/numbers.yml +13 -0
  47. data/resources/locales/el/calendars.yml +227 -0
  48. data/resources/locales/el/languages.yml +519 -0
  49. data/resources/locales/el/numbers.yml +42 -0
  50. data/resources/locales/el/plurals.yml +2 -0
  51. data/resources/locales/el/units.yml +107 -0
  52. data/resources/locales/en/calendars.yml +10 -0
  53. data/resources/locales/en/numbers.yml +13 -0
  54. data/resources/locales/es/calendars.yml +9 -0
  55. data/resources/locales/es/numbers.yml +13 -0
  56. data/resources/locales/eu/calendars.yml +173 -0
  57. data/resources/locales/eu/languages.yml +161 -0
  58. data/resources/locales/eu/numbers.yml +43 -0
  59. data/resources/locales/eu/plurals.yml +2 -0
  60. data/resources/locales/eu/units.yml +91 -0
  61. data/resources/locales/fa/calendars.yml +10 -0
  62. data/resources/locales/fa/numbers.yml +13 -0
  63. data/resources/locales/fi/calendars.yml +10 -0
  64. data/resources/locales/fi/numbers.yml +14 -1
  65. data/resources/locales/fil/calendars.yml +8 -0
  66. data/resources/locales/fil/numbers.yml +13 -0
  67. data/resources/locales/fr/calendars.yml +9 -0
  68. data/resources/locales/fr/numbers.yml +14 -1
  69. data/resources/locales/he/calendars.yml +9 -0
  70. data/resources/locales/he/numbers.yml +13 -0
  71. data/resources/locales/hi/calendars.yml +8 -0
  72. data/resources/locales/hi/numbers.yml +13 -0
  73. data/resources/locales/hu/calendars.yml +10 -0
  74. data/resources/locales/hu/numbers.yml +15 -2
  75. data/resources/locales/id/calendars.yml +8 -0
  76. data/resources/locales/id/numbers.yml +16 -3
  77. data/resources/locales/it/calendars.yml +9 -0
  78. data/resources/locales/it/numbers.yml +13 -0
  79. data/resources/locales/ja/calendars.yml +9 -0
  80. data/resources/locales/ja/numbers.yml +13 -0
  81. data/resources/locales/ko/calendars.yml +9 -0
  82. data/resources/locales/ko/numbers.yml +13 -0
  83. data/resources/locales/ms/calendars.yml +8 -0
  84. data/resources/locales/ms/numbers.yml +16 -3
  85. data/resources/locales/nb/calendars.yml +234 -0
  86. data/resources/locales/{no → nb}/languages.yml +25 -4
  87. data/resources/locales/nb/numbers.yml +43 -0
  88. data/resources/locales/nb/plurals.yml +2 -0
  89. data/resources/locales/nb/units.yml +87 -0
  90. data/resources/locales/nl/calendars.yml +10 -0
  91. data/resources/locales/nl/numbers.yml +13 -0
  92. data/resources/locales/pl/calendars.yml +9 -0
  93. data/resources/locales/pl/numbers.yml +14 -1
  94. data/resources/locales/pt/calendars.yml +9 -0
  95. data/resources/locales/pt/numbers.yml +13 -0
  96. data/resources/locales/ru/calendars.yml +10 -0
  97. data/resources/locales/ru/numbers.yml +14 -1
  98. data/resources/locales/sv/calendars.yml +10 -0
  99. data/resources/locales/sv/numbers.yml +14 -1
  100. data/resources/locales/th/calendars.yml +67 -57
  101. data/resources/locales/th/numbers.yml +13 -0
  102. data/resources/locales/tr/calendars.yml +9 -0
  103. data/resources/locales/tr/numbers.yml +13 -0
  104. data/resources/locales/uk/calendars.yml +199 -0
  105. data/resources/locales/uk/languages.yml +519 -0
  106. data/resources/locales/uk/numbers.yml +45 -0
  107. data/resources/locales/uk/plurals.yml +2 -0
  108. data/resources/locales/uk/units.yml +135 -0
  109. data/resources/locales/ur/calendars.yml +9 -0
  110. data/resources/locales/ur/numbers.yml +13 -0
  111. data/resources/locales/zh/calendars.yml +8 -0
  112. data/resources/locales/zh/numbers.yml +13 -0
  113. data/resources/locales/zh-Hant/calendars.yml +8 -0
  114. data/resources/locales/zh-Hant/numbers.yml +16 -3
  115. data/resources/locales/zh-Hant/plurals.yml +2 -0
  116. data/resources/unicode_data/hangul_blocks.yml +21 -0
  117. data/spec/collation/CollationTest_CLDR_NON_IGNORABLE_Short.txt +714 -0
  118. data/spec/collation/collation_spec.rb +93 -0
  119. data/spec/collation/collator_spec.rb +117 -0
  120. data/spec/collation/implicit_collation_elements_spec.rb +24 -0
  121. data/spec/collation/sort_key_spec.rb +56 -0
  122. data/spec/collation/trie_builder_spec.rb +114 -0
  123. data/spec/collation/trie_spec.rb +97 -0
  124. data/spec/core_ext/calendars/datetime_spec.rb +5 -0
  125. data/spec/core_ext/calendars_spec.rb +34 -0
  126. data/spec/core_ext/numbers_spec.rb +39 -0
  127. data/spec/core_ext/string_spec.rb +4 -4
  128. data/spec/formatters/calendars/datetime_formatter_spec.rb +92 -2
  129. data/spec/{normalizers → normalization}/NormalizationTestShort.txt +0 -0
  130. data/spec/{normalizers → normalization}/base_spec.rb +1 -1
  131. data/spec/normalization/hangul_spec.rb +42 -0
  132. data/spec/{normalizers → normalization}/normalization_spec.rb +15 -16
  133. data/spec/readme_spec.rb +2 -2
  134. data/spec/shared/code_point_spec.rb +42 -30
  135. data/spec/shared/resources_spec.rb +30 -6
  136. data/spec/tokenizers/base_spec.rb +17 -0
  137. data/spec/twitter_cldr_spec.rb +1 -1
  138. metadata +71 -83
  139. data/lib/twitter_cldr/normalizers/base.rb +0 -34
  140. data/lib/twitter_cldr/normalizers.rb +0 -14
  141. data/resources/locales/no/calendars.yml +0 -127
  142. data/resources/locales/no/numbers.yml +0 -29
  143. data/resources/locales/no/plurals.yml +0 -1
  144. data/resources/unicode_data/blocks_hangul.yml +0 -46
  145. data/spec/normalizers/NormalizationTest.txt +0 -18431
@@ -0,0 +1,93 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ require 'open-uri'
9
+ require 'zip'
10
+
11
+ include TwitterCldr::Collation
12
+
13
+ describe 'Unicode Collation Algorithm' do
14
+
15
+ SHORT_COLLATION_TEST_PATH = File.join(File.dirname(__FILE__), 'CollationTest_CLDR_NON_IGNORABLE_Short.txt')
16
+ FULL_COLLATION_TEST_PATH = File.join(File.dirname(__FILE__), 'CollationTest_CLDR_NON_IGNORABLE.txt')
17
+
18
+ FULL_COLLATION_TEST_URL = 'http://unicode.org/Public/UCA/latest/CollationAuxiliary.zip'
19
+
20
+ it 'passes all the tests in CollationTest_CLDR_NON_IGNORABLE_Short.txt' do
21
+ run_test(SHORT_COLLATION_TEST_PATH)
22
+ end
23
+
24
+ it 'passes all the tests in CollationTest_CLDR_NON_IGNORABLE.txt', :slow => true do
25
+ prepare_full_test
26
+ run_test(FULL_COLLATION_TEST_PATH)
27
+ end
28
+
29
+ def run_test(file_path)
30
+ collator = Collator.new
31
+
32
+ previous_sort_key = previous_code_points = previous_hex_code_points = nil
33
+
34
+ open(file_path, 'r:utf-8') do |file|
35
+ file.each do |line|
36
+ next unless /^([0-9A-F ]+);/ =~ line
37
+
38
+ current_code_points = $1.split
39
+ current_hex_code_points = current_code_points.map { |cp| cp.to_i(16) }
40
+
41
+ current_sort_key = collator.sort_key(current_code_points)
42
+
43
+ if previous_sort_key
44
+ result = (previous_sort_key <=> current_sort_key).nonzero? || (previous_hex_code_points <=> current_hex_code_points)
45
+ result.should(eq(-1), error_message(previous_code_points, previous_sort_key, current_code_points, current_sort_key))
46
+ end
47
+
48
+ previous_sort_key = current_sort_key
49
+ previous_code_points = current_code_points
50
+ previous_hex_code_points = current_hex_code_points
51
+ end
52
+ end
53
+ end
54
+
55
+ # Generates a descriptive error message test failure.
56
+ #
57
+ def error_message(previous_code_points, previous_sort_key, current_code_points, current_sort_key)
58
+ <<END
59
+ Expected previous code points sequence to sort before the current one.
60
+
61
+ previous:
62
+ code points - #{previous_code_points.join(' ')}
63
+ sort key - #{pretty_sort_key(previous_sort_key)}
64
+ current:
65
+ code points - #{current_code_points.join(' ')}
66
+ sort key - #{pretty_sort_key(current_sort_key)}
67
+ END
68
+ end
69
+
70
+ # Downloads full version of the test if necessary.
71
+ #
72
+ def prepare_full_test
73
+ return if File.file?(FULL_COLLATION_TEST_PATH)
74
+
75
+ print ' Downloading CollationAuxillary.zip ... '
76
+ zip_file = Tempfile.new('CollationAuxillary.zip')
77
+ zip_file.write(open(FULL_COLLATION_TEST_URL).read)
78
+ zip_file.close
79
+
80
+ print 'extracting CollationTest_CLDR_NON_IGNORABLE.txt ... '
81
+ Zip::ZipFile.open(zip_file.path) do |zip|
82
+ open(FULL_COLLATION_TEST_PATH, 'w') { |file| file.write(zip.read('CollationAuxiliary/CollationTest_CLDR_NON_IGNORABLE.txt')) }
83
+ end
84
+ zip_file.unlink
85
+
86
+ puts 'done.'
87
+ end
88
+
89
+ def pretty_sort_key(current_sort_key)
90
+ "[#{current_sort_key.map{ |byte| byte.to_s(16).upcase }.join(', ')}]"
91
+ end
92
+
93
+ end
@@ -0,0 +1,117 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr::Collation
9
+
10
+ describe Collator do
11
+
12
+ before :each do
13
+ Collator.instance_variable_set(:@trie, nil)
14
+ end
15
+
16
+ after :all do
17
+ Collator.instance_variable_set(:@trie, nil)
18
+ end
19
+
20
+ describe '.trie' do
21
+ it 'returns collation elements trie' do
22
+ mock(TrieBuilder).load_trie(Collator::FRACTIONAL_UCA_SHORT_RESOURCE) { 'trie' }
23
+ Collator.trie.should == 'trie'
24
+ end
25
+
26
+ it 'loads the trie only once' do
27
+ mock(TrieBuilder).load_trie(Collator::FRACTIONAL_UCA_SHORT_RESOURCE) { 'trie' }
28
+
29
+ Collator.trie.object_id.should == Collator.trie.object_id
30
+ end
31
+ end
32
+
33
+ describe '#trie' do
34
+ it 'delegates to the class method' do
35
+ mock(Collator).trie { 'trie' }
36
+ Collator.new.trie.should == 'trie'
37
+ end
38
+
39
+ it 'calls class method only once' do
40
+ mock(Collator).trie { 'trie' }
41
+
42
+ collator = Collator.new
43
+ collator.trie.object_id.should == collator.trie.object_id
44
+ end
45
+ end
46
+
47
+ describe '#sort_key' do
48
+ let(:collator) { Collator.new }
49
+ let(:string) { 'abc' }
50
+ let(:code_points_hex) { %w[0061 0062 0063] }
51
+ let(:code_points) { code_points_hex.map { |cp| cp.to_i(16) } }
52
+ let(:sort_key) { [9986, 10498, 11010, 0, 1282, 1282, 1282, 0, 1282, 1282, 1282] }
53
+
54
+ before(:each) { mock(collator).sort_key_for_code_points(code_points) { sort_key } }
55
+
56
+ it 'calculates sort key for a string' do
57
+ mock(TwitterCldr::Utils::CodePoints).from_string(string) { code_points_hex }
58
+ collator.sort_key(string).should == sort_key
59
+ end
60
+
61
+ it 'calculates sort key for an array of code points (represented as hex strings)' do
62
+ dont_allow(TwitterCldr::Utils::CodePoints).from_string(string)
63
+ collator.sort_key(code_points_hex).should == sort_key
64
+ end
65
+ end
66
+
67
+ describe '#compare' do
68
+ let(:collator) { Collator.new }
69
+ let(:sort_key) { [1, 3, 8, 9] }
70
+ let(:another_sort_key) { [6, 8, 9, 2] }
71
+
72
+ it 'compares strings by sort keys' do
73
+ stub_sort_key(collator, 'foo', sort_key)
74
+ stub_sort_key(collator, 'bar', another_sort_key)
75
+
76
+ collator.compare('foo', 'bar').should == -1
77
+ collator.compare('bar', 'foo').should == 1
78
+ end
79
+
80
+ it 'returns 0 without computing sort keys if strings are equal' do
81
+ dont_allow(collator).sort_key
82
+
83
+ collator.compare('foo', 'foo').should == 0
84
+ end
85
+
86
+ it 'compares strings by code points if the sort keys are equal' do
87
+ stub(collator).sort_key { sort_key }
88
+
89
+ collator.compare('bar', 'foo').should == -1
90
+ end
91
+ end
92
+
93
+ describe '#sort' do
94
+ let(:collator) { Collator.new }
95
+
96
+ it 'sorts strings by sort keys' do
97
+ [['aaa', [1, 2, 3]], ['abc', [1, 3, 4]], ['bca', [2, 5, 9]]].each { |s, key| mock_sort_key(collator, s, key) }
98
+
99
+ collator.sort(%w[bca aaa abc]).should == %w[aaa abc bca]
100
+ end
101
+
102
+ it 'sorts strings with equal sort keys by code points' do
103
+ [['aaa', [1, 2, 3]], ['abc', [1, 2, 3]], ['bca', [1, 2, 3]]].each { |s, key| mock_sort_key(collator, s, key) }
104
+
105
+ collator.sort(%w[bca abc aaa]).should == %w[aaa abc bca]
106
+ end
107
+ end
108
+
109
+ def mock_sort_key(collator, string, sort_key)
110
+ mock(collator).sort_key(TwitterCldr::Utils::CodePoints.from_string(string)) { sort_key }
111
+ end
112
+
113
+ def stub_sort_key(collator, string, sort_key)
114
+ stub(collator).sort_key(TwitterCldr::Utils::CodePoints.from_string(string)) { sort_key }
115
+ end
116
+
117
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr::Collation
9
+
10
+ describe ImplicitCollationElements do
11
+
12
+ it 'computes correct implicit value for non-CJK code points' do
13
+ ImplicitCollationElements.for_code_point(0xD801).should == [[0xE305C758, 0x5, 0x5]]
14
+ ImplicitCollationElements.for_code_point(0xC0001).should == [[0xE44E70AC, 0x5, 0x5]]
15
+ ImplicitCollationElements.for_code_point(0xFFF02).should == [[0xE4C25F74, 0x5, 0x5]]
16
+ end
17
+
18
+ it 'computes correct implicit values for CJK code points' do
19
+ ImplicitCollationElements.for_code_point(0x4E00).should == [[0xE00406, 0x5, 0x5]]
20
+ ImplicitCollationElements.for_code_point(0x3400).should == [[0xE0ABCE, 0x5, 0x5]]
21
+ ImplicitCollationElements.for_code_point(0x20000).should == [[0xE1302590, 0x5, 0x5]]
22
+ end
23
+
24
+ end
@@ -0,0 +1,56 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr::Collation
9
+
10
+ describe SortKey do
11
+
12
+ let(:sort_key) { SortKey.new(collation_elements) }
13
+ let(:collation_elements) { [[63, 13, 149], [66, 81, 143]] }
14
+ let(:sort_key_bytes) { [63, 66, 1, 13, 81, 1, 149, 143] }
15
+
16
+ describe '.build' do
17
+ it 'returns a sort key for a given array of collation elements' do
18
+ sort_key = SortKey.new(collation_elements)
19
+
20
+ mock(SortKey).new(collation_elements) { sort_key }
21
+ mock(sort_key).bytes_array { sort_key_bytes }
22
+
23
+ SortKey.build(collation_elements).should == sort_key_bytes
24
+ end
25
+ end
26
+
27
+ describe '#initialize' do
28
+ it 'assigns collation elements array' do
29
+ SortKey.new(collation_elements).collation_elements.should == collation_elements
30
+ end
31
+ end
32
+
33
+ describe '#bytes_array' do
34
+ it 'builds sort key bytes' do
35
+ sort_key.bytes_array.should == sort_key_bytes
36
+ end
37
+
38
+ it 'builds bytes array only once' do
39
+ mock(sort_key).build_bytes_array { sort_key_bytes }
40
+ sort_key.bytes_array.object_id == sort_key.bytes_array.object_id
41
+ end
42
+
43
+ it 'compresses secondary weights' do
44
+ SortKey.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
45
+ end
46
+
47
+ it 'compresses tertiary weights' do
48
+ SortKey.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 132, 167, 6]
49
+ end
50
+
51
+ it 'compresses secondary and tertiary weights into multiple bytes if necessary' do
52
+ SortKey.new([[39, 5, 5]] * 100).bytes_array.should == [39] * 100 + [1, 69, 40, 1, 48, 48, 18]
53
+ end
54
+ end
55
+
56
+ end
@@ -0,0 +1,114 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr::Collation
9
+
10
+ describe TrieBuilder do
11
+
12
+ describe '#build' do
13
+ describe 'fractional CE trie hash' do
14
+ let(:trie_builder) do
15
+ builder = TrieBuilder.new('resource')
16
+ stub(builder).load_collation_elements_table { FRACTIONAL_UCA_SHORT_STUB }
17
+ builder
18
+ end
19
+
20
+ it 'returns a trie' do
21
+ trie_builder.is_a?(Trie)
22
+ end
23
+
24
+ it 'adds every collation element from the FractionalUCA_SHORT.txt file to the trie' do
25
+ mock(Trie).new { TrieStub.new }
26
+
27
+ trie_builder.build.storage.should == COLLATION_ELEMENTS_TABLE
28
+ end
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ class TrieStub
35
+ attr_accessor :storage
36
+
37
+ def initialize
38
+ self.storage = []
39
+ end
40
+
41
+ def add(code_points, collation_element)
42
+ storage << [code_points, collation_element]
43
+ end
44
+ end
45
+
46
+ FRACTIONAL_UCA_SHORT_STUB = <<END
47
+ # Fractional UCA Table, generated from standard UCA
48
+ # 2012-01-03, 21:52:55 GMT [MD]
49
+ # VERSION: UCA=6.1.0, UCD=6.1.0
50
+ # For a description of the format and usage, see CollationAuxiliary.html
51
+
52
+ [UCA version = 6.1.0]
53
+
54
+ 0000; [,,]
55
+ 030C; [, 97, 05]
56
+ 215E; [20, 05, 3B][0D 75 2C, 05, 3B][22, 05, 3D]
57
+ FC63; [, D3 A9, 33][, D5 11, 33]
58
+ 0E40 0E01; [72 0A, 05, 05][72 7E, 05, 3D]
59
+ 0E40 0E02; [72 0C, 05, 05][72 7E, 05, 3D]
60
+
61
+ # HOMELESS COLLATION ELEMENTS
62
+ FDD0 0063; [, 97, 3D]
63
+ FDD0 0064; [, A7, 09]
64
+
65
+ # SPECIAL MAX/MIN COLLATION ELEMENTS
66
+
67
+ FFFE; [02, 02, 02] # Special LOWEST primary, for merge/interleaving
68
+ FFFF; [EF FE, 05, 05] # Special HIGHEST primary, for ranges
69
+
70
+ # Top Byte => Reordering Tokens
71
+ [top_byte 00 TERMINATOR ] # [0] TERMINATOR=1
72
+ [top_byte 01 LEVEL-SEPARATOR ] # [0] LEVEL-SEPARATOR=1
73
+ [top_byte 02 FIELD-SEPARATOR ] # [0] FIELD-SEPARATOR=1
74
+ [top_byte 03 SPACE ] # [9] SPACE=1 Cc=6 Zl=1 Zp=1 Zs=1
75
+
76
+ # VALUES BASED ON UCA
77
+ [first tertiary ignorable [,,]] # CONSTRUCTED
78
+ [last tertiary ignorable [,,]] # CONSTRUCTED
79
+ # Warning: Case bits are masked in the following
80
+ [first tertiary in secondary non-ignorable [X, X, 05]] # U+0332 COMBINING LOW LINE
81
+ [last tertiary in secondary non-ignorable [X, X, 3D]] # U+2A74 DOUBLE COLON EQUAL
82
+ END
83
+
84
+ COLLATION_ELEMENTS_TABLE = [
85
+ # 0000; [,,]
86
+ [[0], [[0, 0, 0]]],
87
+
88
+ # 030C; [, 97, 05]
89
+ [[780], [[0, 151, 5]]],
90
+
91
+ # 215E; [20, 05, 3B][0D 75 2C, 05, 3B][22, 05, 3D]
92
+ [[8542], [[32, 5, 59], [881964, 5, 59], [34, 5, 61]]],
93
+
94
+ # FC63; [, D3 A9, 33][, D5 11, 33]
95
+ [[64611], [[0, 54185, 51], [0, 54545, 51]]],
96
+
97
+ # 0E40 0E01; [72 0A, 05, 05][72 7E, 05, 3D]
98
+ [[3648, 3585], [[29194, 5, 5], [29310, 5, 61]]],
99
+
100
+ # 0E40 0E02; [72 0C, 05, 05][72 7E, 05, 3D]
101
+ [[3648, 3586], [[29196, 5, 5], [29310, 5, 61]]],
102
+
103
+ # FDD0 0063; [, 97, 3D]
104
+ [[64976, 99], [[0, 151, 61]]],
105
+
106
+ # FDD0 0064; [, A7, 09]
107
+ [[64976, 100], [[0, 167, 9]]],
108
+
109
+ # FFFE; [02, 02, 02]
110
+ [[65534], [[2, 2, 2]]],
111
+
112
+ # FFFF; [EF FE, 05, 05]
113
+ [[65535], [[61438, 5, 5]]]
114
+ ]
@@ -0,0 +1,97 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr::Collation
9
+
10
+ describe Trie do
11
+
12
+ let(:trie) { Trie.new }
13
+
14
+ let(:values) do
15
+ [
16
+ [[1], '1' ],
17
+ [[1, 4], '14' ],
18
+ [[1, 5], '15' ],
19
+ [[1, 4, 8], '148'],
20
+ [[2], '2' ],
21
+ [[2, 7, 5], '275'],
22
+ [[3, 9], '39' ]
23
+ ]
24
+ end
25
+
26
+ before(:each) do
27
+ values.each { |key, value| trie.add(key, value) }
28
+ end
29
+
30
+ describe '#get' do
31
+ it 'returns nil for non existing keys' do
32
+ [[6], [3], [1, 4, 3], [2, 7, 5, 6, 9]].each { |key| trie.get(key).should be_nil }
33
+ end
34
+
35
+ it 'returns value and key size for each existing key' do
36
+ values.each { |key, value| trie.get(key).should == value }
37
+ end
38
+ end
39
+
40
+ describe '#add' do
41
+ it 'overrides values' do
42
+ trie.get([1, 4]).should == '14'
43
+
44
+ trie.add([1, 4], '14-new')
45
+ trie.get([1, 4]).should == '14-new'
46
+ end
47
+ end
48
+
49
+ describe '#find_prefix' do
50
+ describe 'first (value) and third (prefix size) elements of the returned array' do
51
+ it 'value is 0 nil and prefix size is 0 if the prefix was not found' do
52
+ test_find_prefix(trie, [4], nil, 0)
53
+ end
54
+
55
+ it 'stored value and key size as a prefix size if the whole key was found' do
56
+ values.each do |key, value|
57
+ test_find_prefix(trie, key, value)
58
+ end
59
+ end
60
+
61
+ it 'stored value and size of the corresponding prefix if only part of the key was found' do
62
+ tests = {
63
+ [1, 9] => ['1', 1],
64
+ [1, 4, 2] => ['14', 2],
65
+ [1, 4, 8, 9, 2] => ['148', 3],
66
+ [2, 7, 5, 5] => ['275', 3]
67
+ }
68
+
69
+ tests.each { |key, result| test_find_prefix(trie, key, *result) }
70
+ end
71
+
72
+ def test_find_prefix(trie, key, value, size = key.size)
73
+ result = trie.find_prefix(key)
74
+
75
+ result[0].should == value
76
+ result[2].should == size
77
+ end
78
+ end
79
+
80
+ describe 'second (subtrie) element of the returned array' do
81
+ it 'is a hash of possible suffixes for the prefix that was found' do
82
+ trie.find_prefix([1, 4, 8])[1].should == {}
83
+ trie.find_prefix([2, 7])[1].should == { 5 => ["275", { }] }
84
+ end
85
+
86
+ it 'is a hash representing the whole trie if the prefix was not found' do
87
+ trie.find_prefix([404])[1].should == {
88
+ 1 => ['1', { 4 => ['14', { 8 => ['148', {}] }], 5 => ['15', {}] }],
89
+ 2 => ['2', { 7 => [nil, { 5 => ['275', {}] }] }],
90
+ 3 => [nil, { 9 => ['39', {}] }]
91
+ }
92
+ end
93
+ end
94
+
95
+ end
96
+
97
+ end
@@ -35,6 +35,11 @@ describe DateTime do
35
35
  loc_date.to_date.calendar_type.should == :buddhist
36
36
  loc_date.to_time.calendar_type.should == :buddhist
37
37
  end
38
+
39
+ it "should default to English if the given locale isn't supported" do
40
+ loc_date = date.localize(:xx)
41
+ loc_date.locale.should == :en
42
+ end
38
43
  end
39
44
 
40
45
  describe "stringify" do
@@ -0,0 +1,34 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr::Tokenizers
9
+
10
+ describe "Calendars" do
11
+ it "makes sure datetime formatters for every locale don't raise errors" do
12
+ TwitterCldr.supported_locales.each do |locale|
13
+ DateTimeTokenizer::VALID_TYPES.each do |type|
14
+ lambda { DateTime.now.localize(locale).send(:"to_#{type}_s") }.should_not raise_error
15
+ end
16
+ end
17
+ end
18
+
19
+ it "makes sure date formatters for every locale don't raise errors" do
20
+ TwitterCldr.supported_locales.each do |locale|
21
+ DateTimeTokenizer::VALID_TYPES.each do |type|
22
+ lambda { Date.today.localize(locale).send(:"to_#{type}_s") }.should_not raise_error
23
+ end
24
+ end
25
+ end
26
+
27
+ it "makes sure time formatters for every locale don't raise errors" do
28
+ TwitterCldr.supported_locales.each do |locale|
29
+ DateTimeTokenizer::VALID_TYPES.each do |type|
30
+ lambda { Time.now.localize(locale).send(:"to_#{type}_s") }.should_not raise_error
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,39 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr
9
+
10
+ describe "Numbers" do
11
+ it "makes sure currency formatters for every locale don't raise errors" do
12
+ TwitterCldr.supported_locales.each do |locale|
13
+ lambda { 1337.localize(locale).to_currency.to_s }.should_not raise_error
14
+ lambda { 1337.localize(locale).to_currency.to_s(:precision => 3) }.should_not raise_error
15
+ lambda { 1337.localize(locale).to_currency.to_s(:precision => 3, :currency => "EUR") }.should_not raise_error
16
+ end
17
+ end
18
+
19
+ it "makes sure decimal formatters for every locale don't raise errors" do
20
+ TwitterCldr.supported_locales.each do |locale|
21
+ lambda { 1337.localize(locale).to_decimal.to_s }.should_not raise_error
22
+ lambda { 1337.localize(locale).to_decimal.to_s(:precision => 3) }.should_not raise_error
23
+ end
24
+ end
25
+
26
+ it "makes sure percentage formatters for every locale don't raise errors" do
27
+ TwitterCldr.supported_locales.each do |locale|
28
+ lambda { 1337.localize(locale).to_percent.to_s }.should_not raise_error
29
+ lambda { 1337.localize(locale).to_percent.to_s(:precision => 3) }.should_not raise_error
30
+ end
31
+ end
32
+
33
+ it "makes sure basic number formatters for every locale don't raise errors" do
34
+ TwitterCldr.supported_locales.each do |locale|
35
+ lambda { 1337.localize(locale).to_s }.should_not raise_error
36
+ lambda { 1337.localize(locale).to_s(:precision => 3) }.should_not raise_error
37
+ end
38
+ end
39
+ end
@@ -15,8 +15,8 @@ describe String do
15
15
  end
16
16
 
17
17
  it "uses default locale if it's not explicitly specified" do
18
- mock(TwitterCldr).get_locale { :jp }
19
- 'foo'.localize.locale.should == :jp
18
+ mock(TwitterCldr).get_locale { :ja }
19
+ 'foo'.localize.locale.should == :ja
20
20
  end
21
21
 
22
22
  it 'uses provided locale if there is one' do
@@ -111,7 +111,7 @@ describe LocalizedString do
111
111
 
112
112
  describe "#normalize" do
113
113
  it "returns a normalized instance of LocalizedString, defaults to NFD" do
114
- mock.proxy(TwitterCldr::Normalizers::NFD).normalize("español")
114
+ mock.proxy(TwitterCldr::Normalization::NFD).normalize("español")
115
115
  "español".bytes.to_a.should == [101, 115, 112, 97, 195, 177, 111, 108]
116
116
  result = "español".localize.normalize
117
117
  result.should be_a(LocalizedString)
@@ -119,7 +119,7 @@ describe LocalizedString do
119
119
  end
120
120
 
121
121
  it "returns a normalized instance of LocalizedString using the specified algorithm" do
122
- mock.proxy(TwitterCldr::Normalizers::NFKD).normalize("español")
122
+ mock.proxy(TwitterCldr::Normalization::NFKD).normalize("español")
123
123
  "español".bytes.to_a.should == [101, 115, 112, 97, 195, 177, 111, 108]
124
124
  result = "español".localize.normalize(:using => :NFKD)
125
125
  result.should be_a(LocalizedString)