twitter_cldr_js 2.3.2 → 2.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -5
- data/History.txt +8 -0
- data/README.md +69 -1
- data/Rakefile +0 -9
- data/lib/assets/javascripts/twitter_cldr/af.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ar.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/be.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/bg.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/bn.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ca.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/cs.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/cy.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/da.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/de-CH.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/de.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/el.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-150.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-AU.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-CA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-GB.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-IE.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-SG.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-ZA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-419.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-CO.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-MX.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/es-US.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/es.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/eu.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fa.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fil.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/fr-BE.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fr-CA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fr-CH.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/fr.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ga.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/gl.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/he.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/hi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/hr.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/hu.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/id.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/is.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/it-CH.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/it.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/ja.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ko.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/lv.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/msa.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/nl.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/no.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/pl.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/pt.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ro.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/ru.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sk.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sq.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/sr.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sv.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ta.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/th.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/tr.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/uk.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/ur.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/vi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/zh-cn.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/zh-tw.js +2042 -142
- data/lib/twitter_cldr/js/compiler.rb +26 -5
- data/lib/twitter_cldr/js/mustache/calendars/datetime.coffee +1 -4
- data/lib/twitter_cldr/js/mustache/numbers/numbers.coffee +10 -4
- data/lib/twitter_cldr/js/mustache/parsers/parser.coffee +32 -0
- data/lib/twitter_cldr/js/mustache/parsers/segmentation_parser.coffee +89 -0
- data/lib/twitter_cldr/js/mustache/parsers/symbol_table.coffee +14 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_class.coffee +51 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_range.coffee +19 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_set.coffee +36 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/component.coffee +48 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/literal.coffee +44 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/unicode_string.coffee +23 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex_parser.coffee +189 -0
- data/lib/twitter_cldr/js/mustache/plurals/rules.coffee +7 -5
- data/lib/twitter_cldr/js/mustache/shared/break_iterator.coffee +148 -0
- data/lib/twitter_cldr/js/mustache/shared/code_point.coffee +121 -0
- data/lib/twitter_cldr/js/mustache/shared/unicode_regex.coffee +41 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/composite_token.coffee +11 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/segmentation_tokenizer.coffee +24 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/token.coffee +14 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/tokenizer.coffee +83 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/unicode_regex/unicode_regex_tokenizer.coffee +39 -0
- data/lib/twitter_cldr/js/mustache/utilities.coffee +45 -0
- data/lib/twitter_cldr/js/mustache/utils/code_points.coffee +23 -0
- data/lib/twitter_cldr/js/mustache/utils/range.coffee +16 -0
- data/lib/twitter_cldr/js/mustache/utils/range_set.coffee +195 -0
- data/lib/twitter_cldr/js/renderers.rb +39 -10
- data/lib/twitter_cldr/js/renderers/calendars/timespan_renderer.rb +1 -1
- data/lib/twitter_cldr/js/renderers/numbers/numbers_renderer.rb +16 -9
- data/lib/twitter_cldr/js/renderers/parsers/parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/segmentation_parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/symbol_table.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_class.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_range.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_set.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/component.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/literal.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/unicode_string.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex_parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_renderer.rb +27 -28
- data/lib/twitter_cldr/js/renderers/shared/break_iterator_renderer.rb +50 -0
- data/lib/twitter_cldr/js/renderers/shared/code_point_renderer.rb +103 -0
- data/lib/twitter_cldr/js/renderers/shared/unicode_regex_renderer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/composite_token.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/segmentation_tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/token.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/code_points.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/range.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/range_set.rb +18 -0
- data/lib/twitter_cldr/js/tasks/tasks.rb +1 -1
- data/lib/twitter_cldr/js/version.rb +1 -1
- data/spec/js/calendars/datetime.ru.spec.js +17 -0
- data/spec/js/calendars/timespan.ru.spec.js +20 -0
- data/spec/js/numbers/abbreviated/abbreviated_number.spec.js +5 -5
- data/spec/js/numbers/abbreviated/long_decimal.ru.spec.js +24 -0
- data/spec/js/numbers/currency.spec.js +1 -1
- data/spec/js/parsers/parser.spec.js +74 -0
- data/spec/js/parsers/segmentation_parser.spec.js +67 -0
- data/spec/js/parsers/symbol_table.spec.js +20 -0
- data/spec/js/parsers/unicode_regex/character_class.spec.js +121 -0
- data/spec/js/parsers/unicode_regex/character_range.spec.js +17 -0
- data/spec/js/parsers/unicode_regex/character_set.spec.js +17 -0
- data/spec/js/parsers/unicode_regex/literal.spec.js +30 -0
- data/spec/js/parsers/unicode_regex/unicode_string.spec.js +17 -0
- data/spec/js/parsers/unicode_regex_parser.spec.js +76 -0
- data/spec/js/plurals/plural_rules.spec.js +21 -0
- data/spec/js/shared/break_iterator.spec.js +68 -0
- data/spec/js/shared/code_point.spec.js +89 -0
- data/spec/js/shared/unicode_regex.spec.js +201 -0
- data/spec/js/tokenizers/composite_token.spec.js +28 -0
- data/spec/js/tokenizers/segmentation_tokenizer.spec.js +22 -0
- data/spec/js/tokenizers/token.spec.js +25 -0
- data/spec/js/tokenizers/unicode_regex/unicode_regex_tokenizer.spec.js +163 -0
- data/spec/js/utilities.spec.js +47 -0
- data/spec/js/utils/code_points.spec.js +49 -0
- data/spec/js/utils/range_set.spec.js +248 -0
- data/twitter_cldr_js.gemspec +8 -6
- metadata +128 -34
- data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_compiler.rb +0 -93
- data/spec/ruby/renderers/plurals/plural_rules_compiler_spec.rb +0 -56
- data/spec/ruby/spec_helper.rb +0 -11
@@ -0,0 +1,67 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("SegmentationParser", function() {
|
7
|
+
var tokenizer = new TwitterCldr.SegmentationTokenizer();
|
8
|
+
var parser = new TwitterCldr.SegmentationParser();
|
9
|
+
var tokenize = function (text) {
|
10
|
+
return tokenizer.tokenize(text);
|
11
|
+
}
|
12
|
+
var parse = function (text, options) {
|
13
|
+
return parser.parse(text, options);
|
14
|
+
}
|
15
|
+
var symbol_table = new TwitterCldr.SymbolTable({"$FOO" : tokenize("[abc]")});
|
16
|
+
describe("#parse", function() {
|
17
|
+
it("should parse a rule with a break", function() {
|
18
|
+
var rule = parse(tokenize("[a-z] ÷ [0-9]"));
|
19
|
+
expect(rule.left.to_regexp_str()).toEqual("^(?:[\\u0061-\\u007a])");
|
20
|
+
expect(rule.right.to_regexp_str()).toEqual("^(?:[\\u0030-\\u0039])");
|
21
|
+
expect(rule.boundary_symbol).toEqual("break");
|
22
|
+
});
|
23
|
+
it("should parse a rule with a non-break", function() {
|
24
|
+
var rule = parse(tokenize("[a-z] × [0-9]"));
|
25
|
+
expect(rule.regex.to_regexp_str()).toEqual("^(?:[\\u0061-\\u007a])(?:[\\u0030-\\u0039])");
|
26
|
+
expect(rule.boundary_symbol).toEqual("no_break");
|
27
|
+
});
|
28
|
+
it("should parse a rule containing a variable", function() {
|
29
|
+
var rule = parse(tokenize("$FOO × bar"), {'symbol_table' : symbol_table});
|
30
|
+
expect(rule.regex.to_regexp_str()).toEqual("^(?:[\\u0061-\\u0063])(?:\\u0062)(?:\\u0061)(?:\\u0072)");
|
31
|
+
expect(rule.boundary_symbol).toEqual("no_break");
|
32
|
+
});
|
33
|
+
});
|
34
|
+
describe("SegmentationParser.BreakRule", function() {
|
35
|
+
var rule = parse(tokenize("[a-z] ÷ [0-9]"));
|
36
|
+
it("rule should be the right type", function() {
|
37
|
+
expect(rule instanceof TwitterCldr.SegmentationParser.BreakRule)
|
38
|
+
});
|
39
|
+
it("should match and return the right offset and text", function() {
|
40
|
+
var match = rule.match("c7");
|
41
|
+
expect(match.boundary_offset).toEqual(1);
|
42
|
+
expect(match.text).toEqual("c7");
|
43
|
+
});
|
44
|
+
it("should not match if the input string doesn't contain a matching right- and/or left-hand side", function() {
|
45
|
+
expect(rule.match("C7")).toBe(null);
|
46
|
+
expect(rule.match("cc")).toBe(null);
|
47
|
+
expect(rule.match("CC")).toBe(null);
|
48
|
+
});
|
49
|
+
});
|
50
|
+
describe("SegmentationParser.NoBreakRule", function() {
|
51
|
+
var rule = parse(tokenize("[a-z] × [0-9]"));
|
52
|
+
it("rule should be the right type", function() {
|
53
|
+
expect(rule instanceof TwitterCldr.SegmentationParser.NoBreakRule)
|
54
|
+
});
|
55
|
+
it("should match and returh the right offset and text", function() {
|
56
|
+
var match = rule.match("c7");
|
57
|
+
//non-break rules send you to the end of the match (maybe that's wrong?)
|
58
|
+
expect(match.boundary_offset).toEqual(2)
|
59
|
+
expect(match.text).toEqual("c7")
|
60
|
+
});
|
61
|
+
it("should not match if the input string doesn't contain matching text", function() {
|
62
|
+
expect(rule.match("C7")).toBe(null);
|
63
|
+
expect(rule.match("cc")).toBe(null);
|
64
|
+
expect(rule.match("CC")).toBe(null);
|
65
|
+
});
|
66
|
+
});
|
67
|
+
});
|
@@ -0,0 +1,20 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("SymbolTable", function() {
|
7
|
+
beforeEach(function() {
|
8
|
+
table = new TwitterCldr.SymbolTable ({"a":"b", "c":"d"});
|
9
|
+
});
|
10
|
+
describe("#fetch", function() {
|
11
|
+
it("should be able to retrieve values for symbols", function() {
|
12
|
+
expect(table.fetch("a")).toEqual("b");
|
13
|
+
expect(table.fetch("z")).not.toBeDefined();
|
14
|
+
});
|
15
|
+
it("should be able to add then fetch new values for symbols", function() {
|
16
|
+
table.add("e", "f");
|
17
|
+
expect(table.fetch("e")).toEqual("f");
|
18
|
+
});
|
19
|
+
});
|
20
|
+
});
|
@@ -0,0 +1,121 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("Character Class", function() {
|
7
|
+
var tokenizer = new TwitterCldr.UnicodeRegexTokenizer();
|
8
|
+
var parser = new TwitterCldr.UnicodeRegexParser();
|
9
|
+
var tokenize = function (text) {
|
10
|
+
return tokenizer.tokenize(text);
|
11
|
+
}
|
12
|
+
var parse = function (text, options) {
|
13
|
+
return parser.parse(text, options);
|
14
|
+
}
|
15
|
+
var char_class_from = function (elements) {
|
16
|
+
return elements[0];
|
17
|
+
}
|
18
|
+
describe("#to_set", function() {
|
19
|
+
it("unions together char classes with no explicit operator", function() {
|
20
|
+
var char_class = char_class_from(parse(tokenize("[[a][b]]")));
|
21
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([new TwitterCldr.Range(97, 98)]));
|
22
|
+
});
|
23
|
+
it("unions together other entities within char classes when operator is not explicit", function() {
|
24
|
+
var char_class = char_class_from(parse(tokenize("[a-z0-9\\u0123]")));
|
25
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([new TwitterCldr.Range(48, 57), new TwitterCldr.Range(97, 122), new TwitterCldr.Range(291, 291)]));
|
26
|
+
});
|
27
|
+
it("intersects correctly", function() {
|
28
|
+
var char_class = char_class_from(parse(tokenize("[[a-m]&[g-z]]")));
|
29
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([new TwitterCldr.Range(103, 109)]));
|
30
|
+
});
|
31
|
+
it("finds symmetric differences correctly", function() {
|
32
|
+
var char_class = char_class_from(parse(tokenize("[[a-m]-[g-z]]")));
|
33
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([new TwitterCldr.Range(97, 102), new TwitterCldr.Range(110, 122)]));
|
34
|
+
});
|
35
|
+
it("computes sets for nested expressions", function() {
|
36
|
+
// (97..109) U (104..106)
|
37
|
+
// = (104..106)
|
38
|
+
// ((104..106) U (107..122)) subtr ((104..106) C (107..122))
|
39
|
+
// = (104..122) subtr ()
|
40
|
+
// = (104..122)
|
41
|
+
var char_class = char_class_from(parse(tokenize("[[[a-m]&[h-j]]-[k-z]]")));
|
42
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([new TwitterCldr.Range(104, 122)]));
|
43
|
+
});
|
44
|
+
it("pulls in ranges for unicode character sets", function() {
|
45
|
+
var char_class = char_class_from(parse(tokenize("[\\p{Zs}]")));
|
46
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([
|
47
|
+
new TwitterCldr.Range(32, 32),
|
48
|
+
new TwitterCldr.Range(160, 160),
|
49
|
+
new TwitterCldr.Range(5760, 5760),
|
50
|
+
new TwitterCldr.Range(6158, 6158),
|
51
|
+
new TwitterCldr.Range(8192, 8202),
|
52
|
+
new TwitterCldr.Range(8239, 8239),
|
53
|
+
new TwitterCldr.Range(8287, 8287),
|
54
|
+
new TwitterCldr.Range(12288, 12288)
|
55
|
+
]));
|
56
|
+
});
|
57
|
+
it("computes unions between unicode character sets", function() {
|
58
|
+
var char_class = char_class_from(parse(tokenize("[[\\p{Zs}][\\p{Cc}]]")));
|
59
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([
|
60
|
+
new TwitterCldr.Range(0, 1),
|
61
|
+
new TwitterCldr.Range(8, 32),
|
62
|
+
new TwitterCldr.Range(127, 160),
|
63
|
+
new TwitterCldr.Range(5760, 5760),
|
64
|
+
new TwitterCldr.Range(6158, 6158),
|
65
|
+
new TwitterCldr.Range(8192, 8202),
|
66
|
+
new TwitterCldr.Range(8239, 8239),
|
67
|
+
new TwitterCldr.Range(8287, 8287),
|
68
|
+
new TwitterCldr.Range(12288, 12288)
|
69
|
+
]));
|
70
|
+
});
|
71
|
+
it("computes intersections between unicode character sets", function() {
|
72
|
+
var char_class = char_class_from(parse(tokenize("[[\\p{Zs}]&[\\u2000-\\u202B]]")));
|
73
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([new TwitterCldr.Range(8192, 8202)]));
|
74
|
+
});
|
75
|
+
it("supports negating character sets", function() {
|
76
|
+
var char_class = char_class_from(parse(tokenize("[^\\u2000-\\u202B]")));
|
77
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([
|
78
|
+
new TwitterCldr.Range(0, 1),
|
79
|
+
new TwitterCldr.Range(8, 8191),
|
80
|
+
new TwitterCldr.Range(8236, 55295),
|
81
|
+
new TwitterCldr.Range(57344, 65535), //1114111),
|
82
|
+
]));
|
83
|
+
});
|
84
|
+
it("supports literal and excaped characters", function() {
|
85
|
+
var char_class = char_class_from(parse(tokenize("[abc\\edf\\g]")));
|
86
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([new TwitterCldr.Range(97, 103)]));
|
87
|
+
});
|
88
|
+
it("supports special switch characters", function() {
|
89
|
+
var char_class = char_class_from(parse(tokenize("[\\w]"))); // a-z, A-Z, 0-9, _
|
90
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([
|
91
|
+
new TwitterCldr.Range(48, 57),
|
92
|
+
new TwitterCldr.Range(65, 90),
|
93
|
+
new TwitterCldr.Range(95, 95),
|
94
|
+
new TwitterCldr.Range(97, 122),
|
95
|
+
]));
|
96
|
+
});
|
97
|
+
it("supports negated switch characters", function() {
|
98
|
+
char_class = char_class_from(parse(tokenize("[\\D]"))) // i.e. NOT \d
|
99
|
+
expect(char_class.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([
|
100
|
+
new TwitterCldr.Range(0, 1),
|
101
|
+
new TwitterCldr.Range(8, 47),
|
102
|
+
new TwitterCldr.Range(58, 55295),
|
103
|
+
new TwitterCldr.Range(57344, 65535), //1114111),
|
104
|
+
]));
|
105
|
+
});
|
106
|
+
});
|
107
|
+
describe("#to_regexp_str", function() {
|
108
|
+
it("wraps ranges in square brackets", function() {
|
109
|
+
var char_class = char_class_from(parse(tokenize("[a-z]")));
|
110
|
+
expect(char_class.to_regexp_str()).toEqual("(?:[\\u0061-\\u007a])");
|
111
|
+
});
|
112
|
+
it("hex-encodes and wraps sequential characters to isolate bytes", function() {
|
113
|
+
var char_class = char_class_from(parse(tokenize("[{foo}]")));
|
114
|
+
expect(char_class.to_regexp_str()).toEqual("(?:(?:\\u0066)(?:\\u006f)(?:\\u006f))");
|
115
|
+
});
|
116
|
+
it("combines multiple components with 'or' pipe characters", function() {
|
117
|
+
var char_class = char_class_from(parse(tokenize("[{foo}abc]")));
|
118
|
+
expect(char_class.to_regexp_str()).toEqual("(?:(?:\\u0066)(?:\\u006f)(?:\\u006f)|[\\u0061-\\u0063])");
|
119
|
+
});
|
120
|
+
});
|
121
|
+
});
|
@@ -0,0 +1,17 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("Character Range", function() {
|
7
|
+
describe("#to_set", function() {
|
8
|
+
it("should return a range between the initial and the final values", function() {
|
9
|
+
range = new TwitterCldr.CharacterRange (
|
10
|
+
new TwitterCldr.UnicodeString([97]),
|
11
|
+
new TwitterCldr.UnicodeString([98])
|
12
|
+
);
|
13
|
+
|
14
|
+
expect (range.to_set().to_array(true)).toEqualRangeArray([new TwitterCldr.Range(97,98)]);
|
15
|
+
});
|
16
|
+
});
|
17
|
+
});
|
@@ -0,0 +1,17 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("Character Set", function() {
|
7
|
+
describe("#to_set", function() {
|
8
|
+
it("should return a set containing codepoints for the given general property", function() {
|
9
|
+
char_set = new TwitterCldr.CharacterSet ("Zs");
|
10
|
+
expect(char_set.to_set().to_array(true)).toEqualRangeArray([32, 160, 5760, 6158, new TwitterCldr.Range(8192,8202), 8239, 8287, 12288]);
|
11
|
+
});
|
12
|
+
it("should return a set containing codepoints for the given named property", function() {
|
13
|
+
char_set = new TwitterCldr.CharacterSet ("Sentence_Break=Sp");
|
14
|
+
expect(char_set.to_set().to_array(true)).toEqualRangeArray([9, new TwitterCldr.Range(11,12), 32, 160, 5760, new TwitterCldr.Range(8192,8202), 8239, 8287, 12288]);
|
15
|
+
});
|
16
|
+
});
|
17
|
+
});
|
@@ -0,0 +1,30 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("Literal", function() {
|
7
|
+
describe("#to_set", function() {
|
8
|
+
it("should set an array of tokens", function() {
|
9
|
+
literal = new TwitterCldr.Literal ("a");
|
10
|
+
expect(literal.to_set().to_array(true)).toEqual([97]);
|
11
|
+
});
|
12
|
+
it("should return escaped characters with no special meaning as codepoints", function() {
|
13
|
+
literal = new TwitterCldr.Literal ("\\a");
|
14
|
+
expect(literal.to_set().to_array(true)).toEqual([97]);
|
15
|
+
});
|
16
|
+
it("should convert special regex switches to their range equivalents", function() {
|
17
|
+
literal = new TwitterCldr.Literal ("\\d"); // digit
|
18
|
+
expect(literal.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([new TwitterCldr.Range(48,57)]));
|
19
|
+
});
|
20
|
+
it("should convert negated special regex switches to their range equivalents", function() {
|
21
|
+
literal = new TwitterCldr.Literal ("\\D"); // NOT digit
|
22
|
+
expect(literal.to_set()).toEqualRangeSet(new TwitterCldr.RangeSet([
|
23
|
+
new TwitterCldr.Range(0, 1),
|
24
|
+
new TwitterCldr.Range(8, 47),
|
25
|
+
new TwitterCldr.Range(58, 55295),
|
26
|
+
new TwitterCldr.Range(57344, 65535),//1114111),
|
27
|
+
]));
|
28
|
+
});
|
29
|
+
});
|
30
|
+
});
|
@@ -0,0 +1,17 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("Unicode String", function() {
|
7
|
+
describe("#to_set", function() {
|
8
|
+
it("should return a zero-length range when representing a single codepoint", function() {
|
9
|
+
str = new TwitterCldr.UnicodeString([97]);
|
10
|
+
expect (str.to_set().to_array()).toEqualRangeArray([new TwitterCldr.Range(97,97)]);
|
11
|
+
});
|
12
|
+
it("should return a range containing the codepoint array as both the first and last elements", function() {
|
13
|
+
str = new TwitterCldr.UnicodeString([97,98,99]);
|
14
|
+
expect (str.to_set().to_array()).toEqualRangeArray([new TwitterCldr.Range([97,98,99], [97,98,99])]);
|
15
|
+
});
|
16
|
+
});
|
17
|
+
});
|
@@ -0,0 +1,76 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("UnicodeRegexParser", function() {
|
7
|
+
var tokenizer = new TwitterCldr.UnicodeRegexTokenizer();
|
8
|
+
var parser = new TwitterCldr.UnicodeRegexParser();
|
9
|
+
var tokenize = function (text) {
|
10
|
+
return tokenizer.tokenize(text);
|
11
|
+
}
|
12
|
+
var parse = function (text, options) {
|
13
|
+
return parser.parse(text, options);
|
14
|
+
}
|
15
|
+
|
16
|
+
describe("#parse", function() {
|
17
|
+
it("identifies ranges", function() {
|
18
|
+
var elements = parse(tokenize("[a-z]"));
|
19
|
+
expect(elements[0] instanceof TwitterCldr.CharacterClass).toBe(true);
|
20
|
+
var root = elements[0].root;
|
21
|
+
expect(root instanceof TwitterCldr.CharacterRange).toBe(true);
|
22
|
+
expect(root.initial.codepoints).toEqual(TwitterCldr.Utilities.unpack_string("a"));
|
23
|
+
expect(root.final.codepoints).toEqual(TwitterCldr.Utilities.unpack_string("z"));
|
24
|
+
});
|
25
|
+
it("replaces variables", function() {
|
26
|
+
var symbol_table = new TwitterCldr.SymbolTable({"$VAR" : tokenize("\\p{L}")});
|
27
|
+
var elements = parse(tokenize("($VAR)?"), {'symbol_table' : symbol_table});
|
28
|
+
expect(elements[1] instanceof TwitterCldr.CharacterSet).toBe(true);
|
29
|
+
expect(elements[1].property_value).toEqual("L");
|
30
|
+
});
|
31
|
+
it("handles character and negated character sets", function() {
|
32
|
+
var elements = parse(tokenize("\\p{L}[:^P:]\\P{L}[:P:]"));
|
33
|
+
|
34
|
+
var element = elements[0];
|
35
|
+
expect(element instanceof TwitterCldr.CharacterSet).toBe(true);
|
36
|
+
expect(element.property_value).toEqual("L");
|
37
|
+
|
38
|
+
element = elements[1];
|
39
|
+
expect(element instanceof TwitterCldr.CharacterClass).toBe(true);
|
40
|
+
expect(element.root.child.property_value).toEqual("P");
|
41
|
+
expect(element.root.operator).toEqual("negate");
|
42
|
+
|
43
|
+
element = elements[2];
|
44
|
+
expect(element instanceof TwitterCldr.CharacterClass).toBe(true);
|
45
|
+
expect(element.root.child.property_value).toEqual("L");
|
46
|
+
|
47
|
+
element = elements[3];
|
48
|
+
expect(element instanceof TwitterCldr.CharacterSet).toBe(true);
|
49
|
+
expect(element.property_value).toEqual("P");
|
50
|
+
});
|
51
|
+
it("handles unicode characters", function() {
|
52
|
+
var elements = parse(tokenize("\\u0123"));
|
53
|
+
expect(elements[0] instanceof TwitterCldr.UnicodeString).toBe(true);
|
54
|
+
expect(elements[0].codepoints).toEqual([291]);
|
55
|
+
});
|
56
|
+
it("handles multichar and escaped unicode strings", function() {
|
57
|
+
var elements = parse(tokenize("\\g{abc}"));
|
58
|
+
expect(elements[0] instanceof TwitterCldr.Literal).toBe(true);
|
59
|
+
expect(elements[0].text).toEqual("\\g");
|
60
|
+
expect(elements[1] instanceof TwitterCldr.UnicodeString).toBe(true);
|
61
|
+
expect(elements[1].codepoints).toEqual([97, 98, 99]);
|
62
|
+
});
|
63
|
+
it("handles special chars", function() {
|
64
|
+
var elements = parse(tokenize("^(?:)$"));
|
65
|
+
for (var i = 0; i < elements.length; i ++) {
|
66
|
+
expect(elements[i] instanceof TwitterCldr.Literal).toBe(true);
|
67
|
+
}
|
68
|
+
expect(elements[0].text).toEqual("^");
|
69
|
+
expect(elements[1].text).toEqual("(");
|
70
|
+
expect(elements[2].text).toEqual("?");
|
71
|
+
expect(elements[3].text).toEqual(":");
|
72
|
+
expect(elements[4].text).toEqual(")");
|
73
|
+
expect(elements[5].text).toEqual("$");
|
74
|
+
});
|
75
|
+
});
|
76
|
+
});
|
@@ -8,6 +8,17 @@ describe("PluralRules", function() {
|
|
8
8
|
it("returns an array of all English plural rules", function() {
|
9
9
|
expect(TwitterCldr.PluralRules.all()).toEqual(["one", "other"]);
|
10
10
|
});
|
11
|
+
|
12
|
+
it("returns an array of the plural rules for the given type", function() {
|
13
|
+
var actual_rules = TwitterCldr.PluralRules.all('ordinal');
|
14
|
+
var expected_rules = ["one", "two", "few", "other"];
|
15
|
+
|
16
|
+
expect(actual_rules.length).toEqual(expected_rules.length);
|
17
|
+
|
18
|
+
for (actual_rule_idx in actual_rules) {
|
19
|
+
expect(expected_rules).toContain(actual_rules[actual_rule_idx]);
|
20
|
+
}
|
21
|
+
});
|
11
22
|
});
|
12
23
|
|
13
24
|
describe("#rule_for", function() {
|
@@ -24,5 +35,15 @@ describe("PluralRules", function() {
|
|
24
35
|
it("returns 'other' for the number 0", function() {
|
25
36
|
expect(TwitterCldr.PluralRules.rule_for(0)).toEqual("other");
|
26
37
|
});
|
38
|
+
|
39
|
+
it("returns correct ordinal plurals", function() {
|
40
|
+
expect(TwitterCldr.PluralRules.rule_for(1, 'ordinal')).toEqual("one");
|
41
|
+
expect(TwitterCldr.PluralRules.rule_for(2, 'ordinal')).toEqual("two");
|
42
|
+
expect(TwitterCldr.PluralRules.rule_for(3, 'ordinal')).toEqual("few");
|
43
|
+
expect(TwitterCldr.PluralRules.rule_for(4, 'ordinal')).toEqual("other");
|
44
|
+
expect(TwitterCldr.PluralRules.rule_for(11, 'ordinal')).toEqual("other");
|
45
|
+
expect(TwitterCldr.PluralRules.rule_for(12, 'ordinal')).toEqual("other");
|
46
|
+
expect(TwitterCldr.PluralRules.rule_for(22, 'ordinal')).toEqual("two");
|
47
|
+
});
|
27
48
|
});
|
28
49
|
});
|
@@ -0,0 +1,68 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
|
5
|
+
|
6
|
+
describe("BreakIterator", function() {
|
7
|
+
var iterator = new TwitterCldr.BreakIterator ("en", {"use_uli_exceptions" : true});
|
8
|
+
describe("#each_sentence", function() {
|
9
|
+
it("should return an array", function() {
|
10
|
+
expect(iterator.each_sentence("foo bar") instanceof Array).toBe(true);
|
11
|
+
});
|
12
|
+
|
13
|
+
it("splits a simple string into sentences", function() {
|
14
|
+
var str = "The. Quick. Brown. Fox.";
|
15
|
+
expect(iterator.each_sentence(str)).toEqual([
|
16
|
+
"The.", " Quick.", " Brown.", " Fox."
|
17
|
+
]);
|
18
|
+
});
|
19
|
+
|
20
|
+
it("does not split on commas, for example", function() {
|
21
|
+
var str = "The. Quick, brown. Fox.";
|
22
|
+
expect(iterator.each_sentence(str)).toEqual([
|
23
|
+
"The.", " Quick, brown.", " Fox."
|
24
|
+
]);
|
25
|
+
});
|
26
|
+
|
27
|
+
it("does not split periods in the midst of other letters, eg. in a URL", function() {
|
28
|
+
var str = "Visit us. Go to http://translate.twitter.com.";
|
29
|
+
expect(iterator.each_sentence(str)).toEqual([
|
30
|
+
"Visit us.",
|
31
|
+
" Go to http://translate.twitter.com."
|
32
|
+
]);
|
33
|
+
});
|
34
|
+
|
35
|
+
it("splits on sentences that end with other kinds of punctuation", function() {
|
36
|
+
var str = "Help us translate! Speak another language? You really, really rock.";
|
37
|
+
expect(iterator.each_sentence(str)).toEqual([
|
38
|
+
"Help us translate!",
|
39
|
+
" Speak another language?",
|
40
|
+
" You really, really rock."
|
41
|
+
]);
|
42
|
+
});
|
43
|
+
|
44
|
+
describe("with ULI exceptions", function() {
|
45
|
+
it("does not split on certain abbreviations like Mr. and Mrs.", function() {
|
46
|
+
var str = "I really like Mrs. Patterson. She's nice.";
|
47
|
+
expect(iterator.each_sentence(str)).toEqual([
|
48
|
+
"I really like Mrs. Patterson.",
|
49
|
+
" She's nice."
|
50
|
+
]);
|
51
|
+
});
|
52
|
+
});
|
53
|
+
|
54
|
+
describe("without ULI exceptions", function() {
|
55
|
+
var iterator = new TwitterCldr.BreakIterator ("en", {"use_uli_exceptions" : false});
|
56
|
+
it("splits on certain abbreviations like Mr. and Mrs. (use ULI rules to avoid this behavior)", function() {
|
57
|
+
var str = "I really like Mrs. Patterson. She's nice.";
|
58
|
+
expect(iterator.each_sentence(str)).toEqual([
|
59
|
+
"I really like Mrs.",
|
60
|
+
" Patterson.",
|
61
|
+
" She's nice."
|
62
|
+
]);
|
63
|
+
});
|
64
|
+
});
|
65
|
+
});
|
66
|
+
});
|
67
|
+
|
68
|
+
|