twitter_cldr_js 2.3.2 → 2.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -5
- data/History.txt +8 -0
- data/README.md +69 -1
- data/Rakefile +0 -9
- data/lib/assets/javascripts/twitter_cldr/af.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ar.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/be.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/bg.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/bn.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ca.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/cs.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/cy.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/da.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/de-CH.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/de.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/el.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-150.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-AU.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-CA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-GB.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-IE.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-SG.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-ZA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-419.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-CO.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-MX.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/es-US.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/es.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/eu.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fa.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fil.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/fr-BE.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fr-CA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fr-CH.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/fr.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ga.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/gl.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/he.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/hi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/hr.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/hu.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/id.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/is.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/it-CH.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/it.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/ja.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ko.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/lv.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/msa.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/nl.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/no.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/pl.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/pt.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ro.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/ru.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sk.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sq.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/sr.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sv.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ta.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/th.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/tr.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/uk.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/ur.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/vi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/zh-cn.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/zh-tw.js +2042 -142
- data/lib/twitter_cldr/js/compiler.rb +26 -5
- data/lib/twitter_cldr/js/mustache/calendars/datetime.coffee +1 -4
- data/lib/twitter_cldr/js/mustache/numbers/numbers.coffee +10 -4
- data/lib/twitter_cldr/js/mustache/parsers/parser.coffee +32 -0
- data/lib/twitter_cldr/js/mustache/parsers/segmentation_parser.coffee +89 -0
- data/lib/twitter_cldr/js/mustache/parsers/symbol_table.coffee +14 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_class.coffee +51 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_range.coffee +19 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_set.coffee +36 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/component.coffee +48 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/literal.coffee +44 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/unicode_string.coffee +23 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex_parser.coffee +189 -0
- data/lib/twitter_cldr/js/mustache/plurals/rules.coffee +7 -5
- data/lib/twitter_cldr/js/mustache/shared/break_iterator.coffee +148 -0
- data/lib/twitter_cldr/js/mustache/shared/code_point.coffee +121 -0
- data/lib/twitter_cldr/js/mustache/shared/unicode_regex.coffee +41 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/composite_token.coffee +11 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/segmentation_tokenizer.coffee +24 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/token.coffee +14 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/tokenizer.coffee +83 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/unicode_regex/unicode_regex_tokenizer.coffee +39 -0
- data/lib/twitter_cldr/js/mustache/utilities.coffee +45 -0
- data/lib/twitter_cldr/js/mustache/utils/code_points.coffee +23 -0
- data/lib/twitter_cldr/js/mustache/utils/range.coffee +16 -0
- data/lib/twitter_cldr/js/mustache/utils/range_set.coffee +195 -0
- data/lib/twitter_cldr/js/renderers.rb +39 -10
- data/lib/twitter_cldr/js/renderers/calendars/timespan_renderer.rb +1 -1
- data/lib/twitter_cldr/js/renderers/numbers/numbers_renderer.rb +16 -9
- data/lib/twitter_cldr/js/renderers/parsers/parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/segmentation_parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/symbol_table.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_class.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_range.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_set.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/component.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/literal.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/unicode_string.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex_parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_renderer.rb +27 -28
- data/lib/twitter_cldr/js/renderers/shared/break_iterator_renderer.rb +50 -0
- data/lib/twitter_cldr/js/renderers/shared/code_point_renderer.rb +103 -0
- data/lib/twitter_cldr/js/renderers/shared/unicode_regex_renderer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/composite_token.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/segmentation_tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/token.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/code_points.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/range.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/range_set.rb +18 -0
- data/lib/twitter_cldr/js/tasks/tasks.rb +1 -1
- data/lib/twitter_cldr/js/version.rb +1 -1
- data/spec/js/calendars/datetime.ru.spec.js +17 -0
- data/spec/js/calendars/timespan.ru.spec.js +20 -0
- data/spec/js/numbers/abbreviated/abbreviated_number.spec.js +5 -5
- data/spec/js/numbers/abbreviated/long_decimal.ru.spec.js +24 -0
- data/spec/js/numbers/currency.spec.js +1 -1
- data/spec/js/parsers/parser.spec.js +74 -0
- data/spec/js/parsers/segmentation_parser.spec.js +67 -0
- data/spec/js/parsers/symbol_table.spec.js +20 -0
- data/spec/js/parsers/unicode_regex/character_class.spec.js +121 -0
- data/spec/js/parsers/unicode_regex/character_range.spec.js +17 -0
- data/spec/js/parsers/unicode_regex/character_set.spec.js +17 -0
- data/spec/js/parsers/unicode_regex/literal.spec.js +30 -0
- data/spec/js/parsers/unicode_regex/unicode_string.spec.js +17 -0
- data/spec/js/parsers/unicode_regex_parser.spec.js +76 -0
- data/spec/js/plurals/plural_rules.spec.js +21 -0
- data/spec/js/shared/break_iterator.spec.js +68 -0
- data/spec/js/shared/code_point.spec.js +89 -0
- data/spec/js/shared/unicode_regex.spec.js +201 -0
- data/spec/js/tokenizers/composite_token.spec.js +28 -0
- data/spec/js/tokenizers/segmentation_tokenizer.spec.js +22 -0
- data/spec/js/tokenizers/token.spec.js +25 -0
- data/spec/js/tokenizers/unicode_regex/unicode_regex_tokenizer.spec.js +163 -0
- data/spec/js/utilities.spec.js +47 -0
- data/spec/js/utils/code_points.spec.js +49 -0
- data/spec/js/utils/range_set.spec.js +248 -0
- data/twitter_cldr_js.gemspec +8 -6
- metadata +128 -34
- data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_compiler.rb +0 -93
- data/spec/ruby/renderers/plurals/plural_rules_compiler_spec.rb +0 -56
- data/spec/ruby/spec_helper.rb +0 -11
@@ -0,0 +1,89 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("CodePoint", function() {
|
7
|
+
var clear_cache = function () {
|
8
|
+
TwitterCldr.CodePoint.composition_exclusion_cache = {};
|
9
|
+
TwitterCldr.CodePoint.block_cache = {};
|
10
|
+
};
|
11
|
+
|
12
|
+
beforeEach(function () {
|
13
|
+
clear_cache();
|
14
|
+
|
15
|
+
});
|
16
|
+
|
17
|
+
afterEach(function() {
|
18
|
+
clear_cache();
|
19
|
+
});
|
20
|
+
|
21
|
+
describe("#initialize", function() {
|
22
|
+
|
23
|
+
describe("when decomposition is canonical", function() {
|
24
|
+
var decomposition = '0028 007A 0029';
|
25
|
+
var unicode_data = ['17D1', 'KHMER SIGN VIRIAM', 'Mn', '0', 'NSM', decomposition, "", "", "", 'N', "", "", "", "", ""];
|
26
|
+
var code_point = new TwitterCldr.CodePoint(unicode_data);
|
27
|
+
it("parses decomposition mapping", function() {
|
28
|
+
expect(code_point.decomposition()).toEqual([0x28, 0x7A, 0x29]);
|
29
|
+
});
|
30
|
+
|
31
|
+
it("initializes compatibility tag as nil", function() {
|
32
|
+
expect(code_point.compatibility_decomposition_tag()).toBe(null);
|
33
|
+
});
|
34
|
+
|
35
|
+
it("returns false from is_compatibility_decomposition", function() {
|
36
|
+
expect(code_point.is_compatibility_decomposition()).toBe(false);
|
37
|
+
});
|
38
|
+
});
|
39
|
+
|
40
|
+
describe("when decomposition is compatibility", function() {
|
41
|
+
var decomposition = '<font> 0028 007A 0029';
|
42
|
+
var unicode_data = ['17D1', 'KHMER SIGN VIRIAM', 'Mn', '0', 'NSM', decomposition, "", "", "", 'N', "", "", "", "", ""];
|
43
|
+
var code_point = new TwitterCldr.CodePoint(unicode_data);
|
44
|
+
it("parses decomposition mapping", function() {
|
45
|
+
expect(code_point.decomposition()).toEqual([0x28, 0x7A, 0x29]);
|
46
|
+
});
|
47
|
+
|
48
|
+
it("initializes compatibility decomposition tag", function() {
|
49
|
+
expect(code_point.compatibility_decomposition_tag()).toEqual('font');
|
50
|
+
});
|
51
|
+
|
52
|
+
it("returns true from is_compatibility_decomposition", function() {
|
53
|
+
expect(code_point.is_compatibility_decomposition()).toBe(true);
|
54
|
+
});
|
55
|
+
});
|
56
|
+
|
57
|
+
describe("when decomposition is empty", function() {
|
58
|
+
var decomposition = "";
|
59
|
+
var unicode_data = ['17D1', 'KHMER SIGN VIRIAM', 'Mn', '0', 'NSM', decomposition, "", "", "", 'N', "", "", "", "", ""];
|
60
|
+
var code_point = new TwitterCldr.CodePoint(unicode_data);
|
61
|
+
it("parses decomposition mapping", function() {
|
62
|
+
expect(code_point.decomposition()).toBe(null);
|
63
|
+
});
|
64
|
+
|
65
|
+
it("initializes compatibility tag as nil", function() {
|
66
|
+
expect(code_point.compatibility_decomposition_tag()).toBe(null);
|
67
|
+
});
|
68
|
+
|
69
|
+
it("return false from is_compatibility_decomposition", function() {
|
70
|
+
expect(code_point.is_compatibility_decomposition()).toBe(false);
|
71
|
+
});
|
72
|
+
});
|
73
|
+
});
|
74
|
+
describe("#code_points_for_property", function() {
|
75
|
+
it("reutrns code points for the given unicode property and value", function() {
|
76
|
+
cps = TwitterCldr.CodePoint.code_points_for_property("line_break", "CM");
|
77
|
+
expect(cps instanceof Array).toBe(true);
|
78
|
+
expect(cps[0]).toEqualRange(new TwitterCldr.Range(0, 8));
|
79
|
+
|
80
|
+
cps = TwitterCldr.CodePoint.code_points_for_property("sentence_break", "Extend");
|
81
|
+
expect(cps instanceof Array).toBe(true);
|
82
|
+
expect(cps[0]).toEqualRange(new TwitterCldr.Range(768, 879));
|
83
|
+
|
84
|
+
cps = TwitterCldr.CodePoint.code_points_for_property("word_break", "Hebrew_Letter");
|
85
|
+
expect(cps instanceof Array).toBe(true);
|
86
|
+
expect(cps[0]).toEqualRange(new TwitterCldr.Range(1488, 1514));
|
87
|
+
});
|
88
|
+
});
|
89
|
+
});
|
@@ -0,0 +1,201 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
beforeEach(function() {
|
7
|
+
var toMatchUnicodeRegexExactly = function(expected) {
|
8
|
+
if (!(expected instanceof TwitterCldr.UnicodeRegex))
|
9
|
+
return false;
|
10
|
+
var match = this.actual.match(expected.to_regexp_str());
|
11
|
+
return match !== null && this.actual === match[0];
|
12
|
+
};
|
13
|
+
this.addMatchers({
|
14
|
+
toMatchUnicodeRegexExactly : toMatchUnicodeRegexExactly,
|
15
|
+
});
|
16
|
+
});
|
17
|
+
describe("UnicodeRegex", function() {
|
18
|
+
var compile = function (str, symbol_table) {
|
19
|
+
return TwitterCldr.UnicodeRegex.compile(str, "", symbol_table);
|
20
|
+
};
|
21
|
+
var tokenizer = new TwitterCldr.UnicodeRegexTokenizer();
|
22
|
+
var symbol_table = new TwitterCldr.SymbolTable({
|
23
|
+
"$FOO" : tokenizer.tokenize("[g-k]"),
|
24
|
+
"$BAR" : tokenizer.tokenize("[p-s]")
|
25
|
+
});
|
26
|
+
|
27
|
+
describe("basic operations", function() {
|
28
|
+
var regex = compile("[abc]");
|
29
|
+
|
30
|
+
describe("#compile", function() {
|
31
|
+
it("should return a UnicodeRegex, parsed and ready to go", function() {
|
32
|
+
expect(regex instanceof TwitterCldr.UnicodeRegex).toBe(true);
|
33
|
+
});
|
34
|
+
});
|
35
|
+
|
36
|
+
describe("#to_regexp_str", function() {
|
37
|
+
it("should return the string representation of this regex", function() {
|
38
|
+
expect(regex.to_regexp_str()).toEqual("(?:[\\u0061-\\u0063])");
|
39
|
+
});
|
40
|
+
});
|
41
|
+
|
42
|
+
describe("#to_regexp", function() {
|
43
|
+
it("should return a Javascript Regexp", function() {
|
44
|
+
expect(regex.to_regexp() instanceof RegExp).toBe(true);
|
45
|
+
});
|
46
|
+
|
47
|
+
it("should properly turn various basic regexes into strings", function() {
|
48
|
+
expect(compile("^abc$").to_regexp_str()).toEqual("^(?:\\u0061)(?:\\u0062)(?:\\u0063)$");
|
49
|
+
expect(compile("a(b)c").to_regexp_str()).toEqual("(?:\\u0061)((?:\\u0062))(?:\\u0063)");
|
50
|
+
expect(compile("a(?:b)c").to_regexp_str()).toEqual("(?:\\u0061)(?:(?:\\u0062))(?:\\u0063)");
|
51
|
+
expect(compile("a{1,3}").to_regexp_str()).toEqual("(?:\\u0061){1,3}");
|
52
|
+
expect(compile("[abc]").to_regexp_str()).toEqual("(?:[\\u0061-\\u0063])");
|
53
|
+
});
|
54
|
+
|
55
|
+
it("should properly turn various complex regexes into strings", function() {
|
56
|
+
expect(compile("[a-z0-9]").to_regexp_str()).toEqual("(?:[\\u0030-\\u0039]|[\\u0061-\\u007a])");
|
57
|
+
expect(compile("[\\u0067-\\u0071]").to_regexp_str()).toEqual("(?:[\\u0067-\\u0071])");
|
58
|
+
});
|
59
|
+
|
60
|
+
it("should properly substitute variables", function() {
|
61
|
+
expect(compile("$FOO$BAR", symbol_table).to_regexp_str()).toEqual("(?:[\\u0067-\\u006b])(?:[\\u0070-\\u0073])");
|
62
|
+
});
|
63
|
+
});
|
64
|
+
});
|
65
|
+
|
66
|
+
describe("with a few variables", function() {
|
67
|
+
describe("#match", function() {
|
68
|
+
it("should substitute variables from the symbol_table", function() {
|
69
|
+
var regex = compile("$FOO $BAR", symbol_table);
|
70
|
+
expect("h r").toMatchUnicodeRegexExactly(regex);
|
71
|
+
expect("j q").toMatchUnicodeRegexExactly(regex);
|
72
|
+
expect("h t").not.toMatchUnicodeRegexExactly(regex);
|
73
|
+
expect("c s").not.toMatchUnicodeRegexExactly(regex);
|
74
|
+
});
|
75
|
+
});
|
76
|
+
});
|
77
|
+
describe("should match a regex with a capturing group", function() {
|
78
|
+
describe("#match", function() {
|
79
|
+
it("should match a regex with no char class", function() {
|
80
|
+
var regex = compile("^abc$");
|
81
|
+
expect("abc").toMatchUnicodeRegexExactly(regex);
|
82
|
+
expect("cba").not.toMatchUnicodeRegexExactly(regex);
|
83
|
+
});
|
84
|
+
|
85
|
+
it("should match a regex with a capturing group", function() {
|
86
|
+
var regex = compile("a(b)c");
|
87
|
+
var match = regex.match("abc");
|
88
|
+
expect(match).not.toBe(null);
|
89
|
+
expect(match[1]).toEqual("b");
|
90
|
+
});
|
91
|
+
|
92
|
+
it("should match a regex with a non-capturing group", function() {
|
93
|
+
var regex = compile("a(?:b)c");
|
94
|
+
var match = regex.match("abc");
|
95
|
+
expect(match).not.toBe(null);
|
96
|
+
expect(match.length).toEqual(1);
|
97
|
+
});
|
98
|
+
|
99
|
+
it("should match a regex with a quantifier", function() {
|
100
|
+
var regex = compile("a{1,3}");
|
101
|
+
expect("a").toMatchUnicodeRegexExactly(regex);
|
102
|
+
expect("aa").toMatchUnicodeRegexExactly(regex);
|
103
|
+
expect("aaa").toMatchUnicodeRegexExactly(regex);
|
104
|
+
expect("aaaa").not.toMatchUnicodeRegexExactly(regex);
|
105
|
+
expect("b").not.toMatchUnicodeRegexExactly(regex);
|
106
|
+
});
|
107
|
+
|
108
|
+
it("should match a regex with a basic char class", function() {
|
109
|
+
var regex = compile("[abc]");
|
110
|
+
expect("a").toMatchUnicodeRegexExactly(regex);
|
111
|
+
expect("b").toMatchUnicodeRegexExactly(regex);
|
112
|
+
expect("c").toMatchUnicodeRegexExactly(regex);
|
113
|
+
expect("ab").not.toMatchUnicodeRegexExactly(regex);
|
114
|
+
expect("d").not.toMatchUnicodeRegexExactly(regex);
|
115
|
+
});
|
116
|
+
});
|
117
|
+
});
|
118
|
+
describe("matching complex character classes", function() {
|
119
|
+
describe("#match", function() {
|
120
|
+
it("should match a regex with a char class containing a range", function() {
|
121
|
+
var regex = compile("[a-z0-9]");
|
122
|
+
expect("a").toMatchUnicodeRegexExactly(regex);
|
123
|
+
expect("m").toMatchUnicodeRegexExactly(regex);
|
124
|
+
expect("z").toMatchUnicodeRegexExactly(regex);
|
125
|
+
expect("0").toMatchUnicodeRegexExactly(regex);
|
126
|
+
expect("3").toMatchUnicodeRegexExactly(regex);
|
127
|
+
expect("9").toMatchUnicodeRegexExactly(regex);
|
128
|
+
expect("a0").not.toMatchUnicodeRegexExactly(regex);
|
129
|
+
expect("m4").not.toMatchUnicodeRegexExactly(regex);
|
130
|
+
});
|
131
|
+
|
132
|
+
it("should match a regex with a char class containing a unicode range", function() {
|
133
|
+
var regex = compile("[\\u0067-\\u0071]"); // g-q;
|
134
|
+
expect("g").toMatchUnicodeRegexExactly(regex);
|
135
|
+
expect("q").toMatchUnicodeRegexExactly(regex);
|
136
|
+
expect("h").toMatchUnicodeRegexExactly(regex);
|
137
|
+
expect("z").not.toMatchUnicodeRegexExactly(regex);
|
138
|
+
});
|
139
|
+
|
140
|
+
it("should match a regex containing a character set", function() {
|
141
|
+
var regex = compile("[\\p{Zs}]");
|
142
|
+
expect(TwitterCldr.Utilities.pack_array([160])).toMatchUnicodeRegexExactly(regex); // non-breaking space.toMatchUnicodeRegexExactly(regex);
|
143
|
+
expect(TwitterCldr.Utilities.pack_array([5760])).toMatchUnicodeRegexExactly(regex); // ogham space mark.toMatchUnicodeRegexExactly(regex);
|
144
|
+
expect("a").not.toMatchUnicodeRegexExactly(regex);
|
145
|
+
});
|
146
|
+
|
147
|
+
it("should match a regex containing a negated character set", function() {
|
148
|
+
var regex = compile("[\\P{Zs}]");
|
149
|
+
expect("a").toMatchUnicodeRegexExactly(regex);
|
150
|
+
expect(TwitterCldr.Utilities.pack_array([160])).not.toMatchUnicodeRegexExactly(regex);
|
151
|
+
expect(TwitterCldr.Utilities.pack_array([5760])).not.toMatchUnicodeRegexExactly(regex);
|
152
|
+
});
|
153
|
+
|
154
|
+
it("should match a regex containing a character set (alternate syntax)", function() {
|
155
|
+
var regex = compile("[[:Zs:]]");
|
156
|
+
expect(TwitterCldr.Utilities.pack_array([160])).toMatchUnicodeRegexExactly(regex); // non-breaking space.toMatchUnicodeRegexExactly(regex);
|
157
|
+
expect(TwitterCldr.Utilities.pack_array([5760])).toMatchUnicodeRegexExactly(regex); // ogham space mark.toMatchUnicodeRegexExactly(regex);
|
158
|
+
expect("a").not.toMatchUnicodeRegexExactly(regex);
|
159
|
+
});
|
160
|
+
|
161
|
+
it("should match a regex containing a negated character set (alternate syntax)", function() {
|
162
|
+
var regex = compile("[[:^Zs:]]");
|
163
|
+
expect("a").toMatchUnicodeRegexExactly(regex);
|
164
|
+
expect(TwitterCldr.Utilities.pack_array([160])).not.toMatchUnicodeRegexExactly(regex);
|
165
|
+
expect(TwitterCldr.Utilities.pack_array([5760])).not.toMatchUnicodeRegexExactly(regex);
|
166
|
+
});
|
167
|
+
|
168
|
+
it("should match a regex with a character set and some quantifiers", function() {
|
169
|
+
var regex = compile("[\\u0067-\\u0071]+");
|
170
|
+
expect("gg").toMatchUnicodeRegexExactly(regex);
|
171
|
+
expect("gh").toMatchUnicodeRegexExactly(regex);
|
172
|
+
expect("qjk").toMatchUnicodeRegexExactly(regex);
|
173
|
+
expect("").not.toMatchUnicodeRegexExactly(regex);
|
174
|
+
});
|
175
|
+
|
176
|
+
it("should match a regex that uses special switches inside the char class", function() {
|
177
|
+
var regex = compile("[\\w]+");
|
178
|
+
expect("a").toMatchUnicodeRegexExactly(regex);
|
179
|
+
expect("abc").toMatchUnicodeRegexExactly(regex);
|
180
|
+
expect("a0b_1c2").toMatchUnicodeRegexExactly(regex);
|
181
|
+
expect("$@#").not.toMatchUnicodeRegexExactly(regex);
|
182
|
+
});
|
183
|
+
|
184
|
+
it("should match a regex that uses negated special switches inside the char class", function() {
|
185
|
+
var regex = compile("[\\W]+");
|
186
|
+
expect("a").not.toMatchUnicodeRegexExactly(regex);
|
187
|
+
expect("abc").not.toMatchUnicodeRegexExactly(regex);
|
188
|
+
expect("a0b_1c2").not.toMatchUnicodeRegexExactly(regex);
|
189
|
+
expect("$@#").toMatchUnicodeRegexExactly(regex);
|
190
|
+
});
|
191
|
+
|
192
|
+
it("should match a regex with a complicated expression inside the char class", function() {
|
193
|
+
// not [separators U space-tilde] diff [letters diff numbers] (diff is commutative)
|
194
|
+
var regex = compile("[^[\\p{Z}\\u0020-\\u007f]-[\\p{L}]-[\\p{N}]]");
|
195
|
+
expect(" ").toMatchUnicodeRegexExactly(regex);
|
196
|
+
expect(",").toMatchUnicodeRegexExactly(regex);
|
197
|
+
expect("a").not.toMatchUnicodeRegexExactly(regex);
|
198
|
+
});
|
199
|
+
});
|
200
|
+
});
|
201
|
+
});
|
@@ -0,0 +1,28 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("Token", function() {
|
7
|
+
describe("#constructor", function() {
|
8
|
+
it("should set an array of tokens", function() {
|
9
|
+
token_0 = new TwitterCldr.Token({"type":"my_type_0", "value":"my_value_0"});
|
10
|
+
token_1 = new TwitterCldr.Token({"type":"my_type_1", "value":"my_value_1"});
|
11
|
+
|
12
|
+
composite_token = new TwitterCldr.CompositeToken ([token_0, token_1]);
|
13
|
+
|
14
|
+
expect(composite_token.tokens.map(function(token){return token.type;})).toEqual(["my_type_0", "my_type_1"]);
|
15
|
+
expect(composite_token.tokens.map(function(token){return token.value;})).toEqual(["my_value_0", "my_value_1"]);
|
16
|
+
});
|
17
|
+
});
|
18
|
+
describe("#to_string", function() {
|
19
|
+
it("should return the content", function() {
|
20
|
+
token_0 = new TwitterCldr.Token({"type":"my_type_0", "value":"my_value_0"});
|
21
|
+
token_1 = new TwitterCldr.Token({"type":"my_type_1", "value":"my_value_1"});
|
22
|
+
|
23
|
+
composite_token = new TwitterCldr.CompositeToken ([token_0, token_1]);
|
24
|
+
|
25
|
+
expect(composite_token.to_string()).toEqual("my_value_0my_value_1");
|
26
|
+
});
|
27
|
+
});
|
28
|
+
});
|
@@ -0,0 +1,22 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("SegmentationTokenizer", function() {
|
7
|
+
var tokenizer = new TwitterCldr.SegmentationTokenizer();
|
8
|
+
it("should tokenize an expression with a non-break", function() {
|
9
|
+
expect(tokenizer.tokenize("$CB ÷ $SP")).toEqualTokenList([
|
10
|
+
{ 'value' : "$CB", 'type' : "variable" },
|
11
|
+
{ 'value' : "÷", 'type' : "break" },
|
12
|
+
{ 'value' : "$SP", 'type' : "variable" }
|
13
|
+
]);
|
14
|
+
});
|
15
|
+
it("should tokenize an expression with a non-break", function() {
|
16
|
+
expect(tokenizer.tokenize("$ATerm × $Numeric")).toEqualTokenList([
|
17
|
+
{ 'value' : "$ATerm", 'type' : "variable" },
|
18
|
+
{ 'value' : "×", 'type' : "no_break" },
|
19
|
+
{ 'value' : "$Numeric", 'type' : "variable" }
|
20
|
+
]);
|
21
|
+
});
|
22
|
+
});
|
@@ -0,0 +1,25 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("Token", function() {
|
7
|
+
describe("#constructor", function() {
|
8
|
+
it("should set instance variables passed in the options hash", function() {
|
9
|
+
token = new TwitterCldr.Token({"type":"my_type", "value":"my_value"});
|
10
|
+
expect(token.type).toEqual("my_type");
|
11
|
+
expect(token.value).toEqual("my_value");
|
12
|
+
});
|
13
|
+
});
|
14
|
+
describe("#to_string", function() {
|
15
|
+
it("should return the token's value", function() {
|
16
|
+
expect(new TwitterCldr.Token({"value":"my_value"}).to_string()).toEqual("my_value");
|
17
|
+
});
|
18
|
+
});
|
19
|
+
describe("#to_hash", function() {
|
20
|
+
it("should return the token's attributes as a hash", function() {
|
21
|
+
properties = {"type":"my_type", "value":"my_value"};
|
22
|
+
expect(new TwitterCldr.Token(properties).to_hash()).toEqual(properties);
|
23
|
+
});
|
24
|
+
});
|
25
|
+
});
|
@@ -0,0 +1,163 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
beforeEach(function() {
|
7
|
+
var toEqualTokenList = function (expected) {
|
8
|
+
if (!(this.actual instanceof Array) || !(expected instanceof Array))
|
9
|
+
return false;
|
10
|
+
if (this.actual.length !== expected.length)
|
11
|
+
return false;
|
12
|
+
for (var i = 0; i < this.actual.length; i++) {
|
13
|
+
var hash = expected[i];
|
14
|
+
for (key in expected[i])
|
15
|
+
{
|
16
|
+
if (expected[i][key] !== this.actual[i][key])
|
17
|
+
return false;
|
18
|
+
}
|
19
|
+
}
|
20
|
+
return true;
|
21
|
+
}
|
22
|
+
this.addMatchers({
|
23
|
+
toEqualTokenList : toEqualTokenList
|
24
|
+
});
|
25
|
+
});
|
26
|
+
|
27
|
+
describe("UnicodeRegexTokenizer", function() {
|
28
|
+
var tokenizer = new TwitterCldr.UnicodeRegexTokenizer();
|
29
|
+
it("should tokenize a regular regex", function() {
|
30
|
+
expect(tokenizer.tokenize("^(ab)xy$")).toEqualTokenList([
|
31
|
+
{ 'value' : "^", 'type' : "negate" },
|
32
|
+
{ 'value' : "(", 'type' : "special_char" },
|
33
|
+
{ 'value' : "a", 'type' : "string" },
|
34
|
+
{ 'value' : "b", 'type' : "string" },
|
35
|
+
{ 'value' : ")", 'type' : "special_char" },
|
36
|
+
{ 'value' : "x", 'type' : "string" },
|
37
|
+
{ 'value' : "y", 'type' : "string" },
|
38
|
+
{ 'value' : "$", 'type' : "special_char" }
|
39
|
+
]);
|
40
|
+
});
|
41
|
+
it("should tokenize a regex containing a basic character class", function() {
|
42
|
+
expect(tokenizer.tokenize("a[bc]d")).toEqualTokenList([
|
43
|
+
{ 'value' : "a", 'type' : "string" },
|
44
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
45
|
+
{ 'value' : "b", 'type' : "string" },
|
46
|
+
{ 'value' : "c", 'type' : "string" },
|
47
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
48
|
+
{ 'value' : "d", 'type' : "string" }
|
49
|
+
]);
|
50
|
+
});
|
51
|
+
it("should tokenize a regex containing unicode character sets", function() {
|
52
|
+
expect(tokenizer.tokenize("\\p{Zs}[:Lu:]")).toEqualTokenList([
|
53
|
+
{ 'value' : "\\p{Zs}", 'type' : "character_set" },
|
54
|
+
{ 'value' : "[:Lu:]", 'type' : "character_set" }
|
55
|
+
]);
|
56
|
+
});
|
57
|
+
it("should tokenize a regex containing escaped characters", function() {
|
58
|
+
expect(tokenizer.tokenize("^[a\\b]\\$")).toEqualTokenList([
|
59
|
+
{ 'value' : "^", 'type' : "negate" },
|
60
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
61
|
+
{ 'value' : "a", 'type' : "string" },
|
62
|
+
{ 'value' : "\\b", 'type' : "escaped_character" },
|
63
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
64
|
+
{ 'value' : "\\$", 'type' : "escaped_character" }
|
65
|
+
]);
|
66
|
+
});
|
67
|
+
it("should tokenize a regex containing basic character ranges", function() {
|
68
|
+
expect(tokenizer.tokenize("[a-z0-9]|[ab]")).toEqualTokenList([
|
69
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
70
|
+
{ 'value' : "a", 'type' : "string" },
|
71
|
+
{ 'value' : "-", 'type' : "dash" },
|
72
|
+
{ 'value' : "z", 'type' : "string" },
|
73
|
+
{ 'value' : "0", 'type' : "string" },
|
74
|
+
{ 'value' : "-", 'type' : "dash" },
|
75
|
+
{ 'value' : "9", 'type' : "string" },
|
76
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
77
|
+
{ 'value' : "|", 'type' : "pipe" },
|
78
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
79
|
+
{ 'value' : "a", 'type' : "string" },
|
80
|
+
{ 'value' : "b", 'type' : "string" },
|
81
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
82
|
+
]);
|
83
|
+
});
|
84
|
+
it("should tokenize a regex containing escaped unicode characters", function() {
|
85
|
+
expect(tokenizer.tokenize("\\u0020[\\u0123-\\u0155]")).toEqualTokenList([
|
86
|
+
{ 'value' : "\\u0020", 'type' : "unicode_char" },
|
87
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
88
|
+
{ 'value' : "\\u0123", 'type' : "unicode_char" },
|
89
|
+
{ 'value' : "-", 'type' : "dash" },
|
90
|
+
{ 'value' : "\\u0155", 'type' : "unicode_char" },
|
91
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
92
|
+
]);
|
93
|
+
});
|
94
|
+
it("should tokenize a regex containing variable substitutions", function() {
|
95
|
+
expect(tokenizer.tokenize("$CR(?:ab)[$LF]")).toEqualTokenList([
|
96
|
+
{ 'value' : "$CR", 'type' : "variable" },
|
97
|
+
{ 'value' : "(", 'type' : "special_char" },
|
98
|
+
{ 'value' : "?", 'type' : "special_char" },
|
99
|
+
{ 'value' : ":", 'type' : "special_char" },
|
100
|
+
{ 'value' : "a", 'type' : "string" },
|
101
|
+
{ 'value' : "b", 'type' : "string" },
|
102
|
+
{ 'value' : ")", 'type' : "special_char" },
|
103
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
104
|
+
{ 'value' : "$LF", 'type' : "variable" },
|
105
|
+
{ 'value' : "]", 'type' : "close_bracket" }
|
106
|
+
]);
|
107
|
+
});
|
108
|
+
it("should tokenize a regex containing multichar strings", function() {
|
109
|
+
expect(tokenizer.tokenize("[{foo}bar]")).toEqualTokenList([
|
110
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
111
|
+
{ 'value' : "{foo}", 'type' : "multichar_string" },
|
112
|
+
{ 'value' : "b", 'type' : "string" },
|
113
|
+
{ 'value' : "a", 'type' : "string" },
|
114
|
+
{ 'value' : "r", 'type' : "string" },
|
115
|
+
{ 'value' : "]", 'type' : "close_bracket" }
|
116
|
+
]);
|
117
|
+
});
|
118
|
+
it("should tokenize a regex containing negated character sets", function() {
|
119
|
+
expect(tokenizer.tokenize("[[:^N:]\\P{L}]")).toEqualTokenList([
|
120
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
121
|
+
{ 'value' : "[:^N:]", 'type' : "negated_character_set" },
|
122
|
+
{ 'value' : "\\P{L}", 'type' : "negated_character_set" },
|
123
|
+
{ 'value' : "]", 'type' : "close_bracket" }
|
124
|
+
]);
|
125
|
+
});
|
126
|
+
it("should tokenize a regex containing some of everything", function() {
|
127
|
+
expect(tokenizer.tokenize("^[a-zb]?[^[\\p{Z}\\u0020-\\u007f]-[\\P{L}]-[[:N:]\\u0123]][:^CC:]*[{foo}]+$")).toEqualTokenList([
|
128
|
+
{ 'value' : "^", 'type' : "negate" },
|
129
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
130
|
+
{ 'value' : "a", 'type' : "string" },
|
131
|
+
{ 'value' : "-", 'type' : "dash" },
|
132
|
+
{ 'value' : "z", 'type' : "string" },
|
133
|
+
{ 'value' : "b", 'type' : "string" },
|
134
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
135
|
+
{ 'value' : "?", 'type' : "special_char" },
|
136
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
137
|
+
{ 'value' : "^", 'type' : "negate" },
|
138
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
139
|
+
{ 'value' : "\\p{Z}", 'type' : "character_set" },
|
140
|
+
{ 'value' : "\\u0020", 'type' : "unicode_char" },
|
141
|
+
{ 'value' : "-", 'type' : "dash" },
|
142
|
+
{ 'value' : "\\u007f", 'type' : "unicode_char" },
|
143
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
144
|
+
{ 'value' : "-", 'type' : "dash" },
|
145
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
146
|
+
{ 'value' : "\\P{L}", 'type' : "negated_character_set" },
|
147
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
148
|
+
{ 'value' : "-", 'type' : "dash" },
|
149
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
150
|
+
{ 'value' : "[:N:]", 'type' : "character_set" },
|
151
|
+
{ 'value' : "\\u0123", 'type' : "unicode_char" },
|
152
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
153
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
154
|
+
{ 'value' : "[:^CC:]", 'type' : "negated_character_set" },
|
155
|
+
{ 'value' : "*", 'type' : "special_char" },
|
156
|
+
{ 'value' : "[", 'type' : "open_bracket" },
|
157
|
+
{ 'value' : "{foo}", 'type' : "multichar_string" },
|
158
|
+
{ 'value' : "]", 'type' : "close_bracket" },
|
159
|
+
{ 'value' : "+", 'type' : "special_char" },
|
160
|
+
{ 'value' : "$", 'type' : "special_char" }
|
161
|
+
]);
|
162
|
+
});
|
163
|
+
});
|