twitter_cldr_js 2.3.2 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (153) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -5
  3. data/History.txt +8 -0
  4. data/README.md +69 -1
  5. data/Rakefile +0 -9
  6. data/lib/assets/javascripts/twitter_cldr/af.js +2042 -142
  7. data/lib/assets/javascripts/twitter_cldr/ar.js +2043 -143
  8. data/lib/assets/javascripts/twitter_cldr/be.js +2044 -144
  9. data/lib/assets/javascripts/twitter_cldr/bg.js +2042 -142
  10. data/lib/assets/javascripts/twitter_cldr/bn.js +2042 -142
  11. data/lib/assets/javascripts/twitter_cldr/ca.js +2042 -142
  12. data/lib/assets/javascripts/twitter_cldr/cs.js +2043 -143
  13. data/lib/assets/javascripts/twitter_cldr/cy.js +2043 -143
  14. data/lib/assets/javascripts/twitter_cldr/da.js +2042 -142
  15. data/lib/assets/javascripts/twitter_cldr/de-CH.js +2042 -142
  16. data/lib/assets/javascripts/twitter_cldr/de.js +2042 -142
  17. data/lib/assets/javascripts/twitter_cldr/el.js +2042 -142
  18. data/lib/assets/javascripts/twitter_cldr/en-150.js +2042 -142
  19. data/lib/assets/javascripts/twitter_cldr/en-AU.js +2042 -142
  20. data/lib/assets/javascripts/twitter_cldr/en-CA.js +2042 -142
  21. data/lib/assets/javascripts/twitter_cldr/en-GB.js +2042 -142
  22. data/lib/assets/javascripts/twitter_cldr/en-IE.js +2042 -142
  23. data/lib/assets/javascripts/twitter_cldr/en-SG.js +2042 -142
  24. data/lib/assets/javascripts/twitter_cldr/en-ZA.js +2042 -142
  25. data/lib/assets/javascripts/twitter_cldr/en.js +2042 -142
  26. data/lib/assets/javascripts/twitter_cldr/es-419.js +2042 -142
  27. data/lib/assets/javascripts/twitter_cldr/es-CO.js +2042 -142
  28. data/lib/assets/javascripts/twitter_cldr/es-MX.js +2044 -144
  29. data/lib/assets/javascripts/twitter_cldr/es-US.js +2043 -143
  30. data/lib/assets/javascripts/twitter_cldr/es.js +2043 -143
  31. data/lib/assets/javascripts/twitter_cldr/eu.js +2042 -142
  32. data/lib/assets/javascripts/twitter_cldr/fa.js +2042 -142
  33. data/lib/assets/javascripts/twitter_cldr/fi.js +2042 -142
  34. data/lib/assets/javascripts/twitter_cldr/fil.js +2043 -143
  35. data/lib/assets/javascripts/twitter_cldr/fr-BE.js +2042 -142
  36. data/lib/assets/javascripts/twitter_cldr/fr-CA.js +2042 -142
  37. data/lib/assets/javascripts/twitter_cldr/fr-CH.js +2043 -143
  38. data/lib/assets/javascripts/twitter_cldr/fr.js +2042 -142
  39. data/lib/assets/javascripts/twitter_cldr/ga.js +2044 -144
  40. data/lib/assets/javascripts/twitter_cldr/gl.js +2042 -142
  41. data/lib/assets/javascripts/twitter_cldr/he.js +2043 -143
  42. data/lib/assets/javascripts/twitter_cldr/hi.js +2042 -142
  43. data/lib/assets/javascripts/twitter_cldr/hr.js +2043 -143
  44. data/lib/assets/javascripts/twitter_cldr/hu.js +2042 -142
  45. data/lib/assets/javascripts/twitter_cldr/id.js +2042 -142
  46. data/lib/assets/javascripts/twitter_cldr/is.js +2042 -142
  47. data/lib/assets/javascripts/twitter_cldr/it-CH.js +2043 -143
  48. data/lib/assets/javascripts/twitter_cldr/it.js +2043 -143
  49. data/lib/assets/javascripts/twitter_cldr/ja.js +2042 -142
  50. data/lib/assets/javascripts/twitter_cldr/ko.js +2042 -142
  51. data/lib/assets/javascripts/twitter_cldr/lv.js +2043 -143
  52. data/lib/assets/javascripts/twitter_cldr/msa.js +2043 -143
  53. data/lib/assets/javascripts/twitter_cldr/nl.js +2042 -142
  54. data/lib/assets/javascripts/twitter_cldr/no.js +2042 -142
  55. data/lib/assets/javascripts/twitter_cldr/pl.js +2043 -143
  56. data/lib/assets/javascripts/twitter_cldr/pt.js +2042 -142
  57. data/lib/assets/javascripts/twitter_cldr/ro.js +2043 -143
  58. data/lib/assets/javascripts/twitter_cldr/ru.js +2043 -143
  59. data/lib/assets/javascripts/twitter_cldr/sk.js +2043 -143
  60. data/lib/assets/javascripts/twitter_cldr/sq.js +2042 -142
  61. data/lib/assets/javascripts/twitter_cldr/sr.js +2043 -143
  62. data/lib/assets/javascripts/twitter_cldr/sv.js +2042 -142
  63. data/lib/assets/javascripts/twitter_cldr/ta.js +2042 -142
  64. data/lib/assets/javascripts/twitter_cldr/th.js +2042 -142
  65. data/lib/assets/javascripts/twitter_cldr/tr.js +2042 -142
  66. data/lib/assets/javascripts/twitter_cldr/uk.js +2044 -144
  67. data/lib/assets/javascripts/twitter_cldr/ur.js +2042 -142
  68. data/lib/assets/javascripts/twitter_cldr/vi.js +2042 -142
  69. data/lib/assets/javascripts/twitter_cldr/zh-cn.js +2042 -142
  70. data/lib/assets/javascripts/twitter_cldr/zh-tw.js +2042 -142
  71. data/lib/twitter_cldr/js/compiler.rb +26 -5
  72. data/lib/twitter_cldr/js/mustache/calendars/datetime.coffee +1 -4
  73. data/lib/twitter_cldr/js/mustache/numbers/numbers.coffee +10 -4
  74. data/lib/twitter_cldr/js/mustache/parsers/parser.coffee +32 -0
  75. data/lib/twitter_cldr/js/mustache/parsers/segmentation_parser.coffee +89 -0
  76. data/lib/twitter_cldr/js/mustache/parsers/symbol_table.coffee +14 -0
  77. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_class.coffee +51 -0
  78. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_range.coffee +19 -0
  79. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_set.coffee +36 -0
  80. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/component.coffee +48 -0
  81. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/literal.coffee +44 -0
  82. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/unicode_string.coffee +23 -0
  83. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex_parser.coffee +189 -0
  84. data/lib/twitter_cldr/js/mustache/plurals/rules.coffee +7 -5
  85. data/lib/twitter_cldr/js/mustache/shared/break_iterator.coffee +148 -0
  86. data/lib/twitter_cldr/js/mustache/shared/code_point.coffee +121 -0
  87. data/lib/twitter_cldr/js/mustache/shared/unicode_regex.coffee +41 -0
  88. data/lib/twitter_cldr/js/mustache/tokenizers/composite_token.coffee +11 -0
  89. data/lib/twitter_cldr/js/mustache/tokenizers/segmentation_tokenizer.coffee +24 -0
  90. data/lib/twitter_cldr/js/mustache/tokenizers/token.coffee +14 -0
  91. data/lib/twitter_cldr/js/mustache/tokenizers/tokenizer.coffee +83 -0
  92. data/lib/twitter_cldr/js/mustache/tokenizers/unicode_regex/unicode_regex_tokenizer.coffee +39 -0
  93. data/lib/twitter_cldr/js/mustache/utilities.coffee +45 -0
  94. data/lib/twitter_cldr/js/mustache/utils/code_points.coffee +23 -0
  95. data/lib/twitter_cldr/js/mustache/utils/range.coffee +16 -0
  96. data/lib/twitter_cldr/js/mustache/utils/range_set.coffee +195 -0
  97. data/lib/twitter_cldr/js/renderers.rb +39 -10
  98. data/lib/twitter_cldr/js/renderers/calendars/timespan_renderer.rb +1 -1
  99. data/lib/twitter_cldr/js/renderers/numbers/numbers_renderer.rb +16 -9
  100. data/lib/twitter_cldr/js/renderers/parsers/parser.rb +18 -0
  101. data/lib/twitter_cldr/js/renderers/parsers/segmentation_parser.rb +18 -0
  102. data/lib/twitter_cldr/js/renderers/parsers/symbol_table.rb +18 -0
  103. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_class.rb +18 -0
  104. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_range.rb +18 -0
  105. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_set.rb +18 -0
  106. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/component.rb +18 -0
  107. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/literal.rb +18 -0
  108. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/unicode_string.rb +18 -0
  109. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex_parser.rb +18 -0
  110. data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_renderer.rb +27 -28
  111. data/lib/twitter_cldr/js/renderers/shared/break_iterator_renderer.rb +50 -0
  112. data/lib/twitter_cldr/js/renderers/shared/code_point_renderer.rb +103 -0
  113. data/lib/twitter_cldr/js/renderers/shared/unicode_regex_renderer.rb +18 -0
  114. data/lib/twitter_cldr/js/renderers/tokenizers/composite_token.rb +18 -0
  115. data/lib/twitter_cldr/js/renderers/tokenizers/segmentation_tokenizer.rb +18 -0
  116. data/lib/twitter_cldr/js/renderers/tokenizers/token.rb +18 -0
  117. data/lib/twitter_cldr/js/renderers/tokenizers/tokenizer.rb +18 -0
  118. data/lib/twitter_cldr/js/renderers/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +18 -0
  119. data/lib/twitter_cldr/js/renderers/utils/code_points.rb +18 -0
  120. data/lib/twitter_cldr/js/renderers/utils/range.rb +18 -0
  121. data/lib/twitter_cldr/js/renderers/utils/range_set.rb +18 -0
  122. data/lib/twitter_cldr/js/tasks/tasks.rb +1 -1
  123. data/lib/twitter_cldr/js/version.rb +1 -1
  124. data/spec/js/calendars/datetime.ru.spec.js +17 -0
  125. data/spec/js/calendars/timespan.ru.spec.js +20 -0
  126. data/spec/js/numbers/abbreviated/abbreviated_number.spec.js +5 -5
  127. data/spec/js/numbers/abbreviated/long_decimal.ru.spec.js +24 -0
  128. data/spec/js/numbers/currency.spec.js +1 -1
  129. data/spec/js/parsers/parser.spec.js +74 -0
  130. data/spec/js/parsers/segmentation_parser.spec.js +67 -0
  131. data/spec/js/parsers/symbol_table.spec.js +20 -0
  132. data/spec/js/parsers/unicode_regex/character_class.spec.js +121 -0
  133. data/spec/js/parsers/unicode_regex/character_range.spec.js +17 -0
  134. data/spec/js/parsers/unicode_regex/character_set.spec.js +17 -0
  135. data/spec/js/parsers/unicode_regex/literal.spec.js +30 -0
  136. data/spec/js/parsers/unicode_regex/unicode_string.spec.js +17 -0
  137. data/spec/js/parsers/unicode_regex_parser.spec.js +76 -0
  138. data/spec/js/plurals/plural_rules.spec.js +21 -0
  139. data/spec/js/shared/break_iterator.spec.js +68 -0
  140. data/spec/js/shared/code_point.spec.js +89 -0
  141. data/spec/js/shared/unicode_regex.spec.js +201 -0
  142. data/spec/js/tokenizers/composite_token.spec.js +28 -0
  143. data/spec/js/tokenizers/segmentation_tokenizer.spec.js +22 -0
  144. data/spec/js/tokenizers/token.spec.js +25 -0
  145. data/spec/js/tokenizers/unicode_regex/unicode_regex_tokenizer.spec.js +163 -0
  146. data/spec/js/utilities.spec.js +47 -0
  147. data/spec/js/utils/code_points.spec.js +49 -0
  148. data/spec/js/utils/range_set.spec.js +248 -0
  149. data/twitter_cldr_js.gemspec +8 -6
  150. metadata +128 -34
  151. data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_compiler.rb +0 -93
  152. data/spec/ruby/renderers/plurals/plural_rules_compiler_spec.rb +0 -56
  153. data/spec/ruby/spec_helper.rb +0 -11
@@ -0,0 +1,89 @@
1
+ // Copyright 2012 Twitter, Inc
2
+ // http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
5
+
6
+ describe("CodePoint", function() {
7
+ var clear_cache = function () {
8
+ TwitterCldr.CodePoint.composition_exclusion_cache = {};
9
+ TwitterCldr.CodePoint.block_cache = {};
10
+ };
11
+
12
+ beforeEach(function () {
13
+ clear_cache();
14
+
15
+ });
16
+
17
+ afterEach(function() {
18
+ clear_cache();
19
+ });
20
+
21
+ describe("#initialize", function() {
22
+
23
+ describe("when decomposition is canonical", function() {
24
+ var decomposition = '0028 007A 0029';
25
+ var unicode_data = ['17D1', 'KHMER SIGN VIRIAM', 'Mn', '0', 'NSM', decomposition, "", "", "", 'N', "", "", "", "", ""];
26
+ var code_point = new TwitterCldr.CodePoint(unicode_data);
27
+ it("parses decomposition mapping", function() {
28
+ expect(code_point.decomposition()).toEqual([0x28, 0x7A, 0x29]);
29
+ });
30
+
31
+ it("initializes compatibility tag as nil", function() {
32
+ expect(code_point.compatibility_decomposition_tag()).toBe(null);
33
+ });
34
+
35
+ it("returns false from is_compatibility_decomposition", function() {
36
+ expect(code_point.is_compatibility_decomposition()).toBe(false);
37
+ });
38
+ });
39
+
40
+ describe("when decomposition is compatibility", function() {
41
+ var decomposition = '<font> 0028 007A 0029';
42
+ var unicode_data = ['17D1', 'KHMER SIGN VIRIAM', 'Mn', '0', 'NSM', decomposition, "", "", "", 'N', "", "", "", "", ""];
43
+ var code_point = new TwitterCldr.CodePoint(unicode_data);
44
+ it("parses decomposition mapping", function() {
45
+ expect(code_point.decomposition()).toEqual([0x28, 0x7A, 0x29]);
46
+ });
47
+
48
+ it("initializes compatibility decomposition tag", function() {
49
+ expect(code_point.compatibility_decomposition_tag()).toEqual('font');
50
+ });
51
+
52
+ it("returns true from is_compatibility_decomposition", function() {
53
+ expect(code_point.is_compatibility_decomposition()).toBe(true);
54
+ });
55
+ });
56
+
57
+ describe("when decomposition is empty", function() {
58
+ var decomposition = "";
59
+ var unicode_data = ['17D1', 'KHMER SIGN VIRIAM', 'Mn', '0', 'NSM', decomposition, "", "", "", 'N', "", "", "", "", ""];
60
+ var code_point = new TwitterCldr.CodePoint(unicode_data);
61
+ it("parses decomposition mapping", function() {
62
+ expect(code_point.decomposition()).toBe(null);
63
+ });
64
+
65
+ it("initializes compatibility tag as nil", function() {
66
+ expect(code_point.compatibility_decomposition_tag()).toBe(null);
67
+ });
68
+
69
+ it("return false from is_compatibility_decomposition", function() {
70
+ expect(code_point.is_compatibility_decomposition()).toBe(false);
71
+ });
72
+ });
73
+ });
74
+ describe("#code_points_for_property", function() {
75
+ it("reutrns code points for the given unicode property and value", function() {
76
+ cps = TwitterCldr.CodePoint.code_points_for_property("line_break", "CM");
77
+ expect(cps instanceof Array).toBe(true);
78
+ expect(cps[0]).toEqualRange(new TwitterCldr.Range(0, 8));
79
+
80
+ cps = TwitterCldr.CodePoint.code_points_for_property("sentence_break", "Extend");
81
+ expect(cps instanceof Array).toBe(true);
82
+ expect(cps[0]).toEqualRange(new TwitterCldr.Range(768, 879));
83
+
84
+ cps = TwitterCldr.CodePoint.code_points_for_property("word_break", "Hebrew_Letter");
85
+ expect(cps instanceof Array).toBe(true);
86
+ expect(cps[0]).toEqualRange(new TwitterCldr.Range(1488, 1514));
87
+ });
88
+ });
89
+ });
@@ -0,0 +1,201 @@
1
+ // Copyright 2012 Twitter, Inc
2
+ // http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
5
+
6
+ beforeEach(function() {
7
+ var toMatchUnicodeRegexExactly = function(expected) {
8
+ if (!(expected instanceof TwitterCldr.UnicodeRegex))
9
+ return false;
10
+ var match = this.actual.match(expected.to_regexp_str());
11
+ return match !== null && this.actual === match[0];
12
+ };
13
+ this.addMatchers({
14
+ toMatchUnicodeRegexExactly : toMatchUnicodeRegexExactly,
15
+ });
16
+ });
17
+ describe("UnicodeRegex", function() {
18
+ var compile = function (str, symbol_table) {
19
+ return TwitterCldr.UnicodeRegex.compile(str, "", symbol_table);
20
+ };
21
+ var tokenizer = new TwitterCldr.UnicodeRegexTokenizer();
22
+ var symbol_table = new TwitterCldr.SymbolTable({
23
+ "$FOO" : tokenizer.tokenize("[g-k]"),
24
+ "$BAR" : tokenizer.tokenize("[p-s]")
25
+ });
26
+
27
+ describe("basic operations", function() {
28
+ var regex = compile("[abc]");
29
+
30
+ describe("#compile", function() {
31
+ it("should return a UnicodeRegex, parsed and ready to go", function() {
32
+ expect(regex instanceof TwitterCldr.UnicodeRegex).toBe(true);
33
+ });
34
+ });
35
+
36
+ describe("#to_regexp_str", function() {
37
+ it("should return the string representation of this regex", function() {
38
+ expect(regex.to_regexp_str()).toEqual("(?:[\\u0061-\\u0063])");
39
+ });
40
+ });
41
+
42
+ describe("#to_regexp", function() {
43
+ it("should return a Javascript Regexp", function() {
44
+ expect(regex.to_regexp() instanceof RegExp).toBe(true);
45
+ });
46
+
47
+ it("should properly turn various basic regexes into strings", function() {
48
+ expect(compile("^abc$").to_regexp_str()).toEqual("^(?:\\u0061)(?:\\u0062)(?:\\u0063)$");
49
+ expect(compile("a(b)c").to_regexp_str()).toEqual("(?:\\u0061)((?:\\u0062))(?:\\u0063)");
50
+ expect(compile("a(?:b)c").to_regexp_str()).toEqual("(?:\\u0061)(?:(?:\\u0062))(?:\\u0063)");
51
+ expect(compile("a{1,3}").to_regexp_str()).toEqual("(?:\\u0061){1,3}");
52
+ expect(compile("[abc]").to_regexp_str()).toEqual("(?:[\\u0061-\\u0063])");
53
+ });
54
+
55
+ it("should properly turn various complex regexes into strings", function() {
56
+ expect(compile("[a-z0-9]").to_regexp_str()).toEqual("(?:[\\u0030-\\u0039]|[\\u0061-\\u007a])");
57
+ expect(compile("[\\u0067-\\u0071]").to_regexp_str()).toEqual("(?:[\\u0067-\\u0071])");
58
+ });
59
+
60
+ it("should properly substitute variables", function() {
61
+ expect(compile("$FOO$BAR", symbol_table).to_regexp_str()).toEqual("(?:[\\u0067-\\u006b])(?:[\\u0070-\\u0073])");
62
+ });
63
+ });
64
+ });
65
+
66
+ describe("with a few variables", function() {
67
+ describe("#match", function() {
68
+ it("should substitute variables from the symbol_table", function() {
69
+ var regex = compile("$FOO $BAR", symbol_table);
70
+ expect("h r").toMatchUnicodeRegexExactly(regex);
71
+ expect("j q").toMatchUnicodeRegexExactly(regex);
72
+ expect("h t").not.toMatchUnicodeRegexExactly(regex);
73
+ expect("c s").not.toMatchUnicodeRegexExactly(regex);
74
+ });
75
+ });
76
+ });
77
+ describe("should match a regex with a capturing group", function() {
78
+ describe("#match", function() {
79
+ it("should match a regex with no char class", function() {
80
+ var regex = compile("^abc$");
81
+ expect("abc").toMatchUnicodeRegexExactly(regex);
82
+ expect("cba").not.toMatchUnicodeRegexExactly(regex);
83
+ });
84
+
85
+ it("should match a regex with a capturing group", function() {
86
+ var regex = compile("a(b)c");
87
+ var match = regex.match("abc");
88
+ expect(match).not.toBe(null);
89
+ expect(match[1]).toEqual("b");
90
+ });
91
+
92
+ it("should match a regex with a non-capturing group", function() {
93
+ var regex = compile("a(?:b)c");
94
+ var match = regex.match("abc");
95
+ expect(match).not.toBe(null);
96
+ expect(match.length).toEqual(1);
97
+ });
98
+
99
+ it("should match a regex with a quantifier", function() {
100
+ var regex = compile("a{1,3}");
101
+ expect("a").toMatchUnicodeRegexExactly(regex);
102
+ expect("aa").toMatchUnicodeRegexExactly(regex);
103
+ expect("aaa").toMatchUnicodeRegexExactly(regex);
104
+ expect("aaaa").not.toMatchUnicodeRegexExactly(regex);
105
+ expect("b").not.toMatchUnicodeRegexExactly(regex);
106
+ });
107
+
108
+ it("should match a regex with a basic char class", function() {
109
+ var regex = compile("[abc]");
110
+ expect("a").toMatchUnicodeRegexExactly(regex);
111
+ expect("b").toMatchUnicodeRegexExactly(regex);
112
+ expect("c").toMatchUnicodeRegexExactly(regex);
113
+ expect("ab").not.toMatchUnicodeRegexExactly(regex);
114
+ expect("d").not.toMatchUnicodeRegexExactly(regex);
115
+ });
116
+ });
117
+ });
118
+ describe("matching complex character classes", function() {
119
+ describe("#match", function() {
120
+ it("should match a regex with a char class containing a range", function() {
121
+ var regex = compile("[a-z0-9]");
122
+ expect("a").toMatchUnicodeRegexExactly(regex);
123
+ expect("m").toMatchUnicodeRegexExactly(regex);
124
+ expect("z").toMatchUnicodeRegexExactly(regex);
125
+ expect("0").toMatchUnicodeRegexExactly(regex);
126
+ expect("3").toMatchUnicodeRegexExactly(regex);
127
+ expect("9").toMatchUnicodeRegexExactly(regex);
128
+ expect("a0").not.toMatchUnicodeRegexExactly(regex);
129
+ expect("m4").not.toMatchUnicodeRegexExactly(regex);
130
+ });
131
+
132
+ it("should match a regex with a char class containing a unicode range", function() {
133
+ var regex = compile("[\\u0067-\\u0071]"); // g-q;
134
+ expect("g").toMatchUnicodeRegexExactly(regex);
135
+ expect("q").toMatchUnicodeRegexExactly(regex);
136
+ expect("h").toMatchUnicodeRegexExactly(regex);
137
+ expect("z").not.toMatchUnicodeRegexExactly(regex);
138
+ });
139
+
140
+ it("should match a regex containing a character set", function() {
141
+ var regex = compile("[\\p{Zs}]");
142
+ expect(TwitterCldr.Utilities.pack_array([160])).toMatchUnicodeRegexExactly(regex); // non-breaking space.toMatchUnicodeRegexExactly(regex);
143
+ expect(TwitterCldr.Utilities.pack_array([5760])).toMatchUnicodeRegexExactly(regex); // ogham space mark.toMatchUnicodeRegexExactly(regex);
144
+ expect("a").not.toMatchUnicodeRegexExactly(regex);
145
+ });
146
+
147
+ it("should match a regex containing a negated character set", function() {
148
+ var regex = compile("[\\P{Zs}]");
149
+ expect("a").toMatchUnicodeRegexExactly(regex);
150
+ expect(TwitterCldr.Utilities.pack_array([160])).not.toMatchUnicodeRegexExactly(regex);
151
+ expect(TwitterCldr.Utilities.pack_array([5760])).not.toMatchUnicodeRegexExactly(regex);
152
+ });
153
+
154
+ it("should match a regex containing a character set (alternate syntax)", function() {
155
+ var regex = compile("[[:Zs:]]");
156
+ expect(TwitterCldr.Utilities.pack_array([160])).toMatchUnicodeRegexExactly(regex); // non-breaking space.toMatchUnicodeRegexExactly(regex);
157
+ expect(TwitterCldr.Utilities.pack_array([5760])).toMatchUnicodeRegexExactly(regex); // ogham space mark.toMatchUnicodeRegexExactly(regex);
158
+ expect("a").not.toMatchUnicodeRegexExactly(regex);
159
+ });
160
+
161
+ it("should match a regex containing a negated character set (alternate syntax)", function() {
162
+ var regex = compile("[[:^Zs:]]");
163
+ expect("a").toMatchUnicodeRegexExactly(regex);
164
+ expect(TwitterCldr.Utilities.pack_array([160])).not.toMatchUnicodeRegexExactly(regex);
165
+ expect(TwitterCldr.Utilities.pack_array([5760])).not.toMatchUnicodeRegexExactly(regex);
166
+ });
167
+
168
+ it("should match a regex with a character set and some quantifiers", function() {
169
+ var regex = compile("[\\u0067-\\u0071]+");
170
+ expect("gg").toMatchUnicodeRegexExactly(regex);
171
+ expect("gh").toMatchUnicodeRegexExactly(regex);
172
+ expect("qjk").toMatchUnicodeRegexExactly(regex);
173
+ expect("").not.toMatchUnicodeRegexExactly(regex);
174
+ });
175
+
176
+ it("should match a regex that uses special switches inside the char class", function() {
177
+ var regex = compile("[\\w]+");
178
+ expect("a").toMatchUnicodeRegexExactly(regex);
179
+ expect("abc").toMatchUnicodeRegexExactly(regex);
180
+ expect("a0b_1c2").toMatchUnicodeRegexExactly(regex);
181
+ expect("$@#").not.toMatchUnicodeRegexExactly(regex);
182
+ });
183
+
184
+ it("should match a regex that uses negated special switches inside the char class", function() {
185
+ var regex = compile("[\\W]+");
186
+ expect("a").not.toMatchUnicodeRegexExactly(regex);
187
+ expect("abc").not.toMatchUnicodeRegexExactly(regex);
188
+ expect("a0b_1c2").not.toMatchUnicodeRegexExactly(regex);
189
+ expect("$@#").toMatchUnicodeRegexExactly(regex);
190
+ });
191
+
192
+ it("should match a regex with a complicated expression inside the char class", function() {
193
+ // not [separators U space-tilde] diff [letters diff numbers] (diff is commutative)
194
+ var regex = compile("[^[\\p{Z}\\u0020-\\u007f]-[\\p{L}]-[\\p{N}]]");
195
+ expect(" ").toMatchUnicodeRegexExactly(regex);
196
+ expect(",").toMatchUnicodeRegexExactly(regex);
197
+ expect("a").not.toMatchUnicodeRegexExactly(regex);
198
+ });
199
+ });
200
+ });
201
+ });
@@ -0,0 +1,28 @@
1
+ // Copyright 2012 Twitter, Inc
2
+ // http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
5
+
6
+ describe("Token", function() {
7
+ describe("#constructor", function() {
8
+ it("should set an array of tokens", function() {
9
+ token_0 = new TwitterCldr.Token({"type":"my_type_0", "value":"my_value_0"});
10
+ token_1 = new TwitterCldr.Token({"type":"my_type_1", "value":"my_value_1"});
11
+
12
+ composite_token = new TwitterCldr.CompositeToken ([token_0, token_1]);
13
+
14
+ expect(composite_token.tokens.map(function(token){return token.type;})).toEqual(["my_type_0", "my_type_1"]);
15
+ expect(composite_token.tokens.map(function(token){return token.value;})).toEqual(["my_value_0", "my_value_1"]);
16
+ });
17
+ });
18
+ describe("#to_string", function() {
19
+ it("should return the content", function() {
20
+ token_0 = new TwitterCldr.Token({"type":"my_type_0", "value":"my_value_0"});
21
+ token_1 = new TwitterCldr.Token({"type":"my_type_1", "value":"my_value_1"});
22
+
23
+ composite_token = new TwitterCldr.CompositeToken ([token_0, token_1]);
24
+
25
+ expect(composite_token.to_string()).toEqual("my_value_0my_value_1");
26
+ });
27
+ });
28
+ });
@@ -0,0 +1,22 @@
1
+ // Copyright 2012 Twitter, Inc
2
+ // http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
5
+
6
+ describe("SegmentationTokenizer", function() {
7
+ var tokenizer = new TwitterCldr.SegmentationTokenizer();
8
+ it("should tokenize an expression with a non-break", function() {
9
+ expect(tokenizer.tokenize("$CB ÷ $SP")).toEqualTokenList([
10
+ { 'value' : "$CB", 'type' : "variable" },
11
+ { 'value' : "÷", 'type' : "break" },
12
+ { 'value' : "$SP", 'type' : "variable" }
13
+ ]);
14
+ });
15
+ it("should tokenize an expression with a non-break", function() {
16
+ expect(tokenizer.tokenize("$ATerm × $Numeric")).toEqualTokenList([
17
+ { 'value' : "$ATerm", 'type' : "variable" },
18
+ { 'value' : "×", 'type' : "no_break" },
19
+ { 'value' : "$Numeric", 'type' : "variable" }
20
+ ]);
21
+ });
22
+ });
@@ -0,0 +1,25 @@
1
+ // Copyright 2012 Twitter, Inc
2
+ // http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
5
+
6
+ describe("Token", function() {
7
+ describe("#constructor", function() {
8
+ it("should set instance variables passed in the options hash", function() {
9
+ token = new TwitterCldr.Token({"type":"my_type", "value":"my_value"});
10
+ expect(token.type).toEqual("my_type");
11
+ expect(token.value).toEqual("my_value");
12
+ });
13
+ });
14
+ describe("#to_string", function() {
15
+ it("should return the token's value", function() {
16
+ expect(new TwitterCldr.Token({"value":"my_value"}).to_string()).toEqual("my_value");
17
+ });
18
+ });
19
+ describe("#to_hash", function() {
20
+ it("should return the token's attributes as a hash", function() {
21
+ properties = {"type":"my_type", "value":"my_value"};
22
+ expect(new TwitterCldr.Token(properties).to_hash()).toEqual(properties);
23
+ });
24
+ });
25
+ });
@@ -0,0 +1,163 @@
1
+ // Copyright 2012 Twitter, Inc
2
+ // http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ var TwitterCldr = require('../../../../lib/assets/javascripts/twitter_cldr/en.js');
5
+
6
+ beforeEach(function() {
7
+ var toEqualTokenList = function (expected) {
8
+ if (!(this.actual instanceof Array) || !(expected instanceof Array))
9
+ return false;
10
+ if (this.actual.length !== expected.length)
11
+ return false;
12
+ for (var i = 0; i < this.actual.length; i++) {
13
+ var hash = expected[i];
14
+ for (key in expected[i])
15
+ {
16
+ if (expected[i][key] !== this.actual[i][key])
17
+ return false;
18
+ }
19
+ }
20
+ return true;
21
+ }
22
+ this.addMatchers({
23
+ toEqualTokenList : toEqualTokenList
24
+ });
25
+ });
26
+
27
+ describe("UnicodeRegexTokenizer", function() {
28
+ var tokenizer = new TwitterCldr.UnicodeRegexTokenizer();
29
+ it("should tokenize a regular regex", function() {
30
+ expect(tokenizer.tokenize("^(ab)xy$")).toEqualTokenList([
31
+ { 'value' : "^", 'type' : "negate" },
32
+ { 'value' : "(", 'type' : "special_char" },
33
+ { 'value' : "a", 'type' : "string" },
34
+ { 'value' : "b", 'type' : "string" },
35
+ { 'value' : ")", 'type' : "special_char" },
36
+ { 'value' : "x", 'type' : "string" },
37
+ { 'value' : "y", 'type' : "string" },
38
+ { 'value' : "$", 'type' : "special_char" }
39
+ ]);
40
+ });
41
+ it("should tokenize a regex containing a basic character class", function() {
42
+ expect(tokenizer.tokenize("a[bc]d")).toEqualTokenList([
43
+ { 'value' : "a", 'type' : "string" },
44
+ { 'value' : "[", 'type' : "open_bracket" },
45
+ { 'value' : "b", 'type' : "string" },
46
+ { 'value' : "c", 'type' : "string" },
47
+ { 'value' : "]", 'type' : "close_bracket" },
48
+ { 'value' : "d", 'type' : "string" }
49
+ ]);
50
+ });
51
+ it("should tokenize a regex containing unicode character sets", function() {
52
+ expect(tokenizer.tokenize("\\p{Zs}[:Lu:]")).toEqualTokenList([
53
+ { 'value' : "\\p{Zs}", 'type' : "character_set" },
54
+ { 'value' : "[:Lu:]", 'type' : "character_set" }
55
+ ]);
56
+ });
57
+ it("should tokenize a regex containing escaped characters", function() {
58
+ expect(tokenizer.tokenize("^[a\\b]\\$")).toEqualTokenList([
59
+ { 'value' : "^", 'type' : "negate" },
60
+ { 'value' : "[", 'type' : "open_bracket" },
61
+ { 'value' : "a", 'type' : "string" },
62
+ { 'value' : "\\b", 'type' : "escaped_character" },
63
+ { 'value' : "]", 'type' : "close_bracket" },
64
+ { 'value' : "\\$", 'type' : "escaped_character" }
65
+ ]);
66
+ });
67
+ it("should tokenize a regex containing basic character ranges", function() {
68
+ expect(tokenizer.tokenize("[a-z0-9]|[ab]")).toEqualTokenList([
69
+ { 'value' : "[", 'type' : "open_bracket" },
70
+ { 'value' : "a", 'type' : "string" },
71
+ { 'value' : "-", 'type' : "dash" },
72
+ { 'value' : "z", 'type' : "string" },
73
+ { 'value' : "0", 'type' : "string" },
74
+ { 'value' : "-", 'type' : "dash" },
75
+ { 'value' : "9", 'type' : "string" },
76
+ { 'value' : "]", 'type' : "close_bracket" },
77
+ { 'value' : "|", 'type' : "pipe" },
78
+ { 'value' : "[", 'type' : "open_bracket" },
79
+ { 'value' : "a", 'type' : "string" },
80
+ { 'value' : "b", 'type' : "string" },
81
+ { 'value' : "]", 'type' : "close_bracket" },
82
+ ]);
83
+ });
84
+ it("should tokenize a regex containing escaped unicode characters", function() {
85
+ expect(tokenizer.tokenize("\\u0020[\\u0123-\\u0155]")).toEqualTokenList([
86
+ { 'value' : "\\u0020", 'type' : "unicode_char" },
87
+ { 'value' : "[", 'type' : "open_bracket" },
88
+ { 'value' : "\\u0123", 'type' : "unicode_char" },
89
+ { 'value' : "-", 'type' : "dash" },
90
+ { 'value' : "\\u0155", 'type' : "unicode_char" },
91
+ { 'value' : "]", 'type' : "close_bracket" },
92
+ ]);
93
+ });
94
+ it("should tokenize a regex containing variable substitutions", function() {
95
+ expect(tokenizer.tokenize("$CR(?:ab)[$LF]")).toEqualTokenList([
96
+ { 'value' : "$CR", 'type' : "variable" },
97
+ { 'value' : "(", 'type' : "special_char" },
98
+ { 'value' : "?", 'type' : "special_char" },
99
+ { 'value' : ":", 'type' : "special_char" },
100
+ { 'value' : "a", 'type' : "string" },
101
+ { 'value' : "b", 'type' : "string" },
102
+ { 'value' : ")", 'type' : "special_char" },
103
+ { 'value' : "[", 'type' : "open_bracket" },
104
+ { 'value' : "$LF", 'type' : "variable" },
105
+ { 'value' : "]", 'type' : "close_bracket" }
106
+ ]);
107
+ });
108
+ it("should tokenize a regex containing multichar strings", function() {
109
+ expect(tokenizer.tokenize("[{foo}bar]")).toEqualTokenList([
110
+ { 'value' : "[", 'type' : "open_bracket" },
111
+ { 'value' : "{foo}", 'type' : "multichar_string" },
112
+ { 'value' : "b", 'type' : "string" },
113
+ { 'value' : "a", 'type' : "string" },
114
+ { 'value' : "r", 'type' : "string" },
115
+ { 'value' : "]", 'type' : "close_bracket" }
116
+ ]);
117
+ });
118
+ it("should tokenize a regex containing negated character sets", function() {
119
+ expect(tokenizer.tokenize("[[:^N:]\\P{L}]")).toEqualTokenList([
120
+ { 'value' : "[", 'type' : "open_bracket" },
121
+ { 'value' : "[:^N:]", 'type' : "negated_character_set" },
122
+ { 'value' : "\\P{L}", 'type' : "negated_character_set" },
123
+ { 'value' : "]", 'type' : "close_bracket" }
124
+ ]);
125
+ });
126
+ it("should tokenize a regex containing some of everything", function() {
127
+ expect(tokenizer.tokenize("^[a-zb]?[^[\\p{Z}\\u0020-\\u007f]-[\\P{L}]-[[:N:]\\u0123]][:^CC:]*[{foo}]+$")).toEqualTokenList([
128
+ { 'value' : "^", 'type' : "negate" },
129
+ { 'value' : "[", 'type' : "open_bracket" },
130
+ { 'value' : "a", 'type' : "string" },
131
+ { 'value' : "-", 'type' : "dash" },
132
+ { 'value' : "z", 'type' : "string" },
133
+ { 'value' : "b", 'type' : "string" },
134
+ { 'value' : "]", 'type' : "close_bracket" },
135
+ { 'value' : "?", 'type' : "special_char" },
136
+ { 'value' : "[", 'type' : "open_bracket" },
137
+ { 'value' : "^", 'type' : "negate" },
138
+ { 'value' : "[", 'type' : "open_bracket" },
139
+ { 'value' : "\\p{Z}", 'type' : "character_set" },
140
+ { 'value' : "\\u0020", 'type' : "unicode_char" },
141
+ { 'value' : "-", 'type' : "dash" },
142
+ { 'value' : "\\u007f", 'type' : "unicode_char" },
143
+ { 'value' : "]", 'type' : "close_bracket" },
144
+ { 'value' : "-", 'type' : "dash" },
145
+ { 'value' : "[", 'type' : "open_bracket" },
146
+ { 'value' : "\\P{L}", 'type' : "negated_character_set" },
147
+ { 'value' : "]", 'type' : "close_bracket" },
148
+ { 'value' : "-", 'type' : "dash" },
149
+ { 'value' : "[", 'type' : "open_bracket" },
150
+ { 'value' : "[:N:]", 'type' : "character_set" },
151
+ { 'value' : "\\u0123", 'type' : "unicode_char" },
152
+ { 'value' : "]", 'type' : "close_bracket" },
153
+ { 'value' : "]", 'type' : "close_bracket" },
154
+ { 'value' : "[:^CC:]", 'type' : "negated_character_set" },
155
+ { 'value' : "*", 'type' : "special_char" },
156
+ { 'value' : "[", 'type' : "open_bracket" },
157
+ { 'value' : "{foo}", 'type' : "multichar_string" },
158
+ { 'value' : "]", 'type' : "close_bracket" },
159
+ { 'value' : "+", 'type' : "special_char" },
160
+ { 'value' : "$", 'type' : "special_char" }
161
+ ]);
162
+ });
163
+ });