RubyGems - twitter_cldr_js - Versions diffs - 2.3.2 → 2.4.0 - Mend

twitter_cldr_js 2.3.2 → 2.4.0

Files changed (153) hide show

checksums.yaml +4 -4
data/Gemfile +1 -5
data/History.txt +8 -0
data/README.md +69 -1
data/Rakefile +0 -9
data/lib/assets/javascripts/twitter_cldr/af.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/ar.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/be.js +2044 -144
data/lib/assets/javascripts/twitter_cldr/bg.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/bn.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/ca.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/cs.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/cy.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/da.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/de-CH.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/de.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/el.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/en-150.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/en-AU.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/en-CA.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/en-GB.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/en-IE.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/en-SG.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/en-ZA.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/en.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/es-419.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/es-CO.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/es-MX.js +2044 -144
data/lib/assets/javascripts/twitter_cldr/es-US.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/es.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/eu.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/fa.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/fi.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/fil.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/fr-BE.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/fr-CA.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/fr-CH.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/fr.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/ga.js +2044 -144
data/lib/assets/javascripts/twitter_cldr/gl.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/he.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/hi.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/hr.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/hu.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/id.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/is.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/it-CH.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/it.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/ja.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/ko.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/lv.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/msa.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/nl.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/no.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/pl.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/pt.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/ro.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/ru.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/sk.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/sq.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/sr.js +2043 -143
data/lib/assets/javascripts/twitter_cldr/sv.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/ta.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/th.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/tr.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/uk.js +2044 -144
data/lib/assets/javascripts/twitter_cldr/ur.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/vi.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/zh-cn.js +2042 -142
data/lib/assets/javascripts/twitter_cldr/zh-tw.js +2042 -142
data/lib/twitter_cldr/js/compiler.rb +26 -5
data/lib/twitter_cldr/js/mustache/calendars/datetime.coffee +1 -4
data/lib/twitter_cldr/js/mustache/numbers/numbers.coffee +10 -4
data/lib/twitter_cldr/js/mustache/parsers/parser.coffee +32 -0
data/lib/twitter_cldr/js/mustache/parsers/segmentation_parser.coffee +89 -0
data/lib/twitter_cldr/js/mustache/parsers/symbol_table.coffee +14 -0
data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_class.coffee +51 -0
data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_range.coffee +19 -0
data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_set.coffee +36 -0
data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/component.coffee +48 -0
data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/literal.coffee +44 -0
data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/unicode_string.coffee +23 -0
data/lib/twitter_cldr/js/mustache/parsers/unicode_regex_parser.coffee +189 -0
data/lib/twitter_cldr/js/mustache/plurals/rules.coffee +7 -5
data/lib/twitter_cldr/js/mustache/shared/break_iterator.coffee +148 -0
data/lib/twitter_cldr/js/mustache/shared/code_point.coffee +121 -0
data/lib/twitter_cldr/js/mustache/shared/unicode_regex.coffee +41 -0
data/lib/twitter_cldr/js/mustache/tokenizers/composite_token.coffee +11 -0
data/lib/twitter_cldr/js/mustache/tokenizers/segmentation_tokenizer.coffee +24 -0
data/lib/twitter_cldr/js/mustache/tokenizers/token.coffee +14 -0
data/lib/twitter_cldr/js/mustache/tokenizers/tokenizer.coffee +83 -0
data/lib/twitter_cldr/js/mustache/tokenizers/unicode_regex/unicode_regex_tokenizer.coffee +39 -0
data/lib/twitter_cldr/js/mustache/utilities.coffee +45 -0
data/lib/twitter_cldr/js/mustache/utils/code_points.coffee +23 -0
data/lib/twitter_cldr/js/mustache/utils/range.coffee +16 -0
data/lib/twitter_cldr/js/mustache/utils/range_set.coffee +195 -0
data/lib/twitter_cldr/js/renderers.rb +39 -10
data/lib/twitter_cldr/js/renderers/calendars/timespan_renderer.rb +1 -1
data/lib/twitter_cldr/js/renderers/numbers/numbers_renderer.rb +16 -9
data/lib/twitter_cldr/js/renderers/parsers/parser.rb +18 -0
data/lib/twitter_cldr/js/renderers/parsers/segmentation_parser.rb +18 -0
data/lib/twitter_cldr/js/renderers/parsers/symbol_table.rb +18 -0
data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_class.rb +18 -0
data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_range.rb +18 -0
data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_set.rb +18 -0
data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/component.rb +18 -0
data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/literal.rb +18 -0
data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/unicode_string.rb +18 -0
data/lib/twitter_cldr/js/renderers/parsers/unicode_regex_parser.rb +18 -0
data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_renderer.rb +27 -28
data/lib/twitter_cldr/js/renderers/shared/break_iterator_renderer.rb +50 -0
data/lib/twitter_cldr/js/renderers/shared/code_point_renderer.rb +103 -0
data/lib/twitter_cldr/js/renderers/shared/unicode_regex_renderer.rb +18 -0
data/lib/twitter_cldr/js/renderers/tokenizers/composite_token.rb +18 -0
data/lib/twitter_cldr/js/renderers/tokenizers/segmentation_tokenizer.rb +18 -0
data/lib/twitter_cldr/js/renderers/tokenizers/token.rb +18 -0
data/lib/twitter_cldr/js/renderers/tokenizers/tokenizer.rb +18 -0
data/lib/twitter_cldr/js/renderers/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +18 -0
data/lib/twitter_cldr/js/renderers/utils/code_points.rb +18 -0
data/lib/twitter_cldr/js/renderers/utils/range.rb +18 -0
data/lib/twitter_cldr/js/renderers/utils/range_set.rb +18 -0
data/lib/twitter_cldr/js/tasks/tasks.rb +1 -1
data/lib/twitter_cldr/js/version.rb +1 -1
data/spec/js/calendars/datetime.ru.spec.js +17 -0
data/spec/js/calendars/timespan.ru.spec.js +20 -0
data/spec/js/numbers/abbreviated/abbreviated_number.spec.js +5 -5
data/spec/js/numbers/abbreviated/long_decimal.ru.spec.js +24 -0
data/spec/js/numbers/currency.spec.js +1 -1
data/spec/js/parsers/parser.spec.js +74 -0
data/spec/js/parsers/segmentation_parser.spec.js +67 -0
data/spec/js/parsers/symbol_table.spec.js +20 -0
data/spec/js/parsers/unicode_regex/character_class.spec.js +121 -0
data/spec/js/parsers/unicode_regex/character_range.spec.js +17 -0
data/spec/js/parsers/unicode_regex/character_set.spec.js +17 -0
data/spec/js/parsers/unicode_regex/literal.spec.js +30 -0
data/spec/js/parsers/unicode_regex/unicode_string.spec.js +17 -0
data/spec/js/parsers/unicode_regex_parser.spec.js +76 -0
data/spec/js/plurals/plural_rules.spec.js +21 -0
data/spec/js/shared/break_iterator.spec.js +68 -0
data/spec/js/shared/code_point.spec.js +89 -0
data/spec/js/shared/unicode_regex.spec.js +201 -0
data/spec/js/tokenizers/composite_token.spec.js +28 -0
data/spec/js/tokenizers/segmentation_tokenizer.spec.js +22 -0
data/spec/js/tokenizers/token.spec.js +25 -0
data/spec/js/tokenizers/unicode_regex/unicode_regex_tokenizer.spec.js +163 -0
data/spec/js/utilities.spec.js +47 -0
data/spec/js/utils/code_points.spec.js +49 -0
data/spec/js/utils/range_set.spec.js +248 -0
data/twitter_cldr_js.gemspec +8 -6
metadata +128 -34
data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_compiler.rb +0 -93
data/spec/ruby/renderers/plurals/plural_rules_compiler_spec.rb +0 -56
data/spec/ruby/spec_helper.rb +0 -11

data/spec/js/shared/code_point.spec.js ADDED Viewed

@@ -0,0 +1,89 @@
+// Copyright 2012 Twitter, Inc
+// http://www.apache.org/licenses/LICENSE-2.0
+var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
+describe("CodePoint", function() {
+  var clear_cache = function () {
+    TwitterCldr.CodePoint.composition_exclusion_cache = {};
+    TwitterCldr.CodePoint.block_cache = {};
+  };
+  beforeEach(function () {
+    clear_cache();
+  });
+  afterEach(function() {
+    clear_cache();
+  });
+  describe("#initialize", function() {
+    describe("when decomposition is canonical", function() {
+      var decomposition = '0028 007A 0029';
+      var unicode_data = ['17D1', 'KHMER SIGN VIRIAM', 'Mn', '0', 'NSM', decomposition, "", "", "", 'N', "", "", "", "", ""];
+      var code_point = new TwitterCldr.CodePoint(unicode_data);
+      it("parses decomposition mapping", function() {
+        expect(code_point.decomposition()).toEqual([0x28, 0x7A, 0x29]);
+      });
+      it("initializes compatibility tag as nil", function() {
+        expect(code_point.compatibility_decomposition_tag()).toBe(null);
+      });
+      it("returns false from is_compatibility_decomposition", function() {
+        expect(code_point.is_compatibility_decomposition()).toBe(false);
+      });
+    });
+    describe("when decomposition is compatibility", function() {
+      var decomposition = '<font> 0028 007A 0029';
+      var unicode_data = ['17D1', 'KHMER SIGN VIRIAM', 'Mn', '0', 'NSM', decomposition, "", "", "", 'N', "", "", "", "", ""];
+      var code_point = new TwitterCldr.CodePoint(unicode_data);
+      it("parses decomposition mapping", function() {
+        expect(code_point.decomposition()).toEqual([0x28, 0x7A, 0x29]);
+      });
+      it("initializes compatibility decomposition tag", function() {
+        expect(code_point.compatibility_decomposition_tag()).toEqual('font');
+      });
+      it("returns true from is_compatibility_decomposition", function() {
+        expect(code_point.is_compatibility_decomposition()).toBe(true);
+      });
+    });
+    describe("when decomposition is empty", function() {
+      var decomposition = "";
+      var unicode_data = ['17D1', 'KHMER SIGN VIRIAM', 'Mn', '0', 'NSM', decomposition, "", "", "", 'N', "", "", "", "", ""];
+      var code_point = new TwitterCldr.CodePoint(unicode_data);
+      it("parses decomposition mapping", function() {
+        expect(code_point.decomposition()).toBe(null);
+      });
+      it("initializes compatibility tag as nil", function() {
+        expect(code_point.compatibility_decomposition_tag()).toBe(null);
+      });
+      it("return false from is_compatibility_decomposition", function() {
+        expect(code_point.is_compatibility_decomposition()).toBe(false);
+      });
+    });
+  });
+  describe("#code_points_for_property", function() {
+    it("reutrns code points for the given unicode property and value", function() {
+      cps = TwitterCldr.CodePoint.code_points_for_property("line_break", "CM");
+      expect(cps instanceof Array).toBe(true);
+      expect(cps[0]).toEqualRange(new TwitterCldr.Range(0, 8));
+      cps = TwitterCldr.CodePoint.code_points_for_property("sentence_break", "Extend");
+      expect(cps instanceof Array).toBe(true);
+      expect(cps[0]).toEqualRange(new TwitterCldr.Range(768, 879));
+      cps = TwitterCldr.CodePoint.code_points_for_property("word_break", "Hebrew_Letter");
+      expect(cps instanceof Array).toBe(true);
+      expect(cps[0]).toEqualRange(new TwitterCldr.Range(1488, 1514));
+    });
+  });
+});

data/spec/js/shared/unicode_regex.spec.js ADDED Viewed

@@ -0,0 +1,201 @@
+// Copyright 2012 Twitter, Inc
+// http://www.apache.org/licenses/LICENSE-2.0
+var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
+beforeEach(function() {
+  var toMatchUnicodeRegexExactly = function(expected) {
+    if (!(expected instanceof TwitterCldr.UnicodeRegex))
+      return false;
+    var match = this.actual.match(expected.to_regexp_str());
+    return match !== null && this.actual === match[0];
+  };
+  this.addMatchers({
+    toMatchUnicodeRegexExactly : toMatchUnicodeRegexExactly,
+  });
+});
+describe("UnicodeRegex", function() {
+  var compile = function (str, symbol_table) {
+    return TwitterCldr.UnicodeRegex.compile(str, "", symbol_table);
+  };
+  var tokenizer = new TwitterCldr.UnicodeRegexTokenizer();
+  var symbol_table = new TwitterCldr.SymbolTable({
+    "$FOO" : tokenizer.tokenize("[g-k]"),
+    "$BAR" : tokenizer.tokenize("[p-s]")
+  });
+  describe("basic operations", function() {
+    var regex = compile("[abc]");
+    describe("#compile", function() {
+      it("should return a UnicodeRegex, parsed and ready to go", function() {
+        expect(regex instanceof TwitterCldr.UnicodeRegex).toBe(true);
+      });
+    });
+    describe("#to_regexp_str", function() {
+      it("should return the string representation of this regex", function() {
+        expect(regex.to_regexp_str()).toEqual("(?:[\\u0061-\\u0063])");
+      });
+    });
+    describe("#to_regexp", function() {
+      it("should return a Javascript Regexp", function() {
+        expect(regex.to_regexp() instanceof RegExp).toBe(true);
+      });
+      it("should properly turn various basic regexes into strings", function() {
+        expect(compile("^abc$").to_regexp_str()).toEqual("^(?:\\u0061)(?:\\u0062)(?:\\u0063)$");
+        expect(compile("a(b)c").to_regexp_str()).toEqual("(?:\\u0061)((?:\\u0062))(?:\\u0063)");
+        expect(compile("a(?:b)c").to_regexp_str()).toEqual("(?:\\u0061)(?:(?:\\u0062))(?:\\u0063)");
+        expect(compile("a{1,3}").to_regexp_str()).toEqual("(?:\\u0061){1,3}");
+        expect(compile("[abc]").to_regexp_str()).toEqual("(?:[\\u0061-\\u0063])");
+      });
+      it("should properly turn various complex regexes into strings", function() {
+        expect(compile("[a-z0-9]").to_regexp_str()).toEqual("(?:[\\u0030-\\u0039]|[\\u0061-\\u007a])");
+        expect(compile("[\\u0067-\\u0071]").to_regexp_str()).toEqual("(?:[\\u0067-\\u0071])");
+      });
+      it("should properly substitute variables", function() {
+        expect(compile("$FOO$BAR", symbol_table).to_regexp_str()).toEqual("(?:[\\u0067-\\u006b])(?:[\\u0070-\\u0073])");
+      });
+    });
+  });
+  describe("with a few variables", function() {
+    describe("#match", function() {
+      it("should substitute variables from the symbol_table", function() {
+        var regex = compile("$FOO $BAR", symbol_table);
+        expect("h r").toMatchUnicodeRegexExactly(regex);
+        expect("j q").toMatchUnicodeRegexExactly(regex);
+        expect("h t").not.toMatchUnicodeRegexExactly(regex);
+        expect("c s").not.toMatchUnicodeRegexExactly(regex);
+      });
+    });
+  });
+  describe("should match a regex with a capturing group", function() {
+    describe("#match", function() {
+      it("should match a regex with no char class", function() {
+        var regex = compile("^abc$");
+        expect("abc").toMatchUnicodeRegexExactly(regex);
+        expect("cba").not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex with a capturing group", function() {
+        var regex = compile("a(b)c");
+        var match = regex.match("abc");
+        expect(match).not.toBe(null);
+        expect(match[1]).toEqual("b");
+      });
+      it("should match a regex with a non-capturing group", function() {
+        var regex = compile("a(?:b)c");
+        var match = regex.match("abc");
+        expect(match).not.toBe(null);
+        expect(match.length).toEqual(1);
+      });
+      it("should match a regex with a quantifier", function() {
+        var regex = compile("a{1,3}");
+        expect("a").toMatchUnicodeRegexExactly(regex);
+        expect("aa").toMatchUnicodeRegexExactly(regex);
+        expect("aaa").toMatchUnicodeRegexExactly(regex);
+        expect("aaaa").not.toMatchUnicodeRegexExactly(regex);
+        expect("b").not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex with a basic char class", function() {
+        var regex = compile("[abc]");
+        expect("a").toMatchUnicodeRegexExactly(regex);
+        expect("b").toMatchUnicodeRegexExactly(regex);
+        expect("c").toMatchUnicodeRegexExactly(regex);
+        expect("ab").not.toMatchUnicodeRegexExactly(regex);
+        expect("d").not.toMatchUnicodeRegexExactly(regex);
+      });
+    });
+  });
+  describe("matching complex character classes", function() {
+    describe("#match", function() {
+      it("should match a regex with a char class containing a range", function() {
+        var regex = compile("[a-z0-9]");
+        expect("a").toMatchUnicodeRegexExactly(regex);
+        expect("m").toMatchUnicodeRegexExactly(regex);
+        expect("z").toMatchUnicodeRegexExactly(regex);
+        expect("0").toMatchUnicodeRegexExactly(regex);
+        expect("3").toMatchUnicodeRegexExactly(regex);
+        expect("9").toMatchUnicodeRegexExactly(regex);
+        expect("a0").not.toMatchUnicodeRegexExactly(regex);
+        expect("m4").not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex with a char class containing a unicode range", function() {
+        var regex = compile("[\\u0067-\\u0071]"); // g-q;
+        expect("g").toMatchUnicodeRegexExactly(regex);
+        expect("q").toMatchUnicodeRegexExactly(regex);
+        expect("h").toMatchUnicodeRegexExactly(regex);
+        expect("z").not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex containing a character set", function() {
+        var regex = compile("[\\p{Zs}]");
+        expect(TwitterCldr.Utilities.pack_array([160])).toMatchUnicodeRegexExactly(regex);  // non-breaking space.toMatchUnicodeRegexExactly(regex);
+        expect(TwitterCldr.Utilities.pack_array([5760])).toMatchUnicodeRegexExactly(regex);  // ogham space mark.toMatchUnicodeRegexExactly(regex);
+        expect("a").not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex containing a negated character set", function() {
+        var regex = compile("[\\P{Zs}]");
+        expect("a").toMatchUnicodeRegexExactly(regex);
+        expect(TwitterCldr.Utilities.pack_array([160])).not.toMatchUnicodeRegexExactly(regex);
+        expect(TwitterCldr.Utilities.pack_array([5760])).not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex containing a character set (alternate syntax)", function() {
+        var regex = compile("[[:Zs:]]");
+        expect(TwitterCldr.Utilities.pack_array([160])).toMatchUnicodeRegexExactly(regex);  // non-breaking space.toMatchUnicodeRegexExactly(regex);
+        expect(TwitterCldr.Utilities.pack_array([5760])).toMatchUnicodeRegexExactly(regex);  // ogham space mark.toMatchUnicodeRegexExactly(regex);
+        expect("a").not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex containing a negated character set (alternate syntax)", function() {
+        var regex = compile("[[:^Zs:]]");
+        expect("a").toMatchUnicodeRegexExactly(regex);
+        expect(TwitterCldr.Utilities.pack_array([160])).not.toMatchUnicodeRegexExactly(regex);
+        expect(TwitterCldr.Utilities.pack_array([5760])).not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex with a character set and some quantifiers", function() {
+        var regex = compile("[\\u0067-\\u0071]+");
+        expect("gg").toMatchUnicodeRegexExactly(regex);
+        expect("gh").toMatchUnicodeRegexExactly(regex);
+        expect("qjk").toMatchUnicodeRegexExactly(regex);
+        expect("").not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex that uses special switches inside the char class", function() {
+        var regex = compile("[\\w]+");
+        expect("a").toMatchUnicodeRegexExactly(regex);
+        expect("abc").toMatchUnicodeRegexExactly(regex);
+        expect("a0b_1c2").toMatchUnicodeRegexExactly(regex);
+        expect("$@#").not.toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex that uses negated special switches inside the char class", function() {
+        var regex = compile("[\\W]+");
+        expect("a").not.toMatchUnicodeRegexExactly(regex);
+        expect("abc").not.toMatchUnicodeRegexExactly(regex);
+        expect("a0b_1c2").not.toMatchUnicodeRegexExactly(regex);
+        expect("$@#").toMatchUnicodeRegexExactly(regex);
+      });
+      it("should match a regex with a complicated expression inside the char class", function() {
+        // not [separators U space-tilde] diff [letters diff numbers]  (diff is commutative)
+        var regex = compile("[^[\\p{Z}\\u0020-\\u007f]-[\\p{L}]-[\\p{N}]]");
+        expect(" ").toMatchUnicodeRegexExactly(regex);
+        expect(",").toMatchUnicodeRegexExactly(regex);
+        expect("a").not.toMatchUnicodeRegexExactly(regex);
+      });
+    });
+  });
+});

data/spec/js/tokenizers/composite_token.spec.js ADDED Viewed

@@ -0,0 +1,28 @@
+// Copyright 2012 Twitter, Inc
+// http://www.apache.org/licenses/LICENSE-2.0
+var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
+describe("Token", function() {
+  describe("#constructor", function() {
+    it("should set an array of tokens", function() {
+      token_0 = new TwitterCldr.Token({"type":"my_type_0", "value":"my_value_0"});
+      token_1 = new TwitterCldr.Token({"type":"my_type_1", "value":"my_value_1"});
+      composite_token = new TwitterCldr.CompositeToken ([token_0, token_1]);
+      expect(composite_token.tokens.map(function(token){return token.type;})).toEqual(["my_type_0", "my_type_1"]);
+      expect(composite_token.tokens.map(function(token){return token.value;})).toEqual(["my_value_0", "my_value_1"]);
+    });
+  });
+  describe("#to_string", function() {
+    it("should return the content", function() {
+      token_0 = new TwitterCldr.Token({"type":"my_type_0", "value":"my_value_0"});
+      token_1 = new TwitterCldr.Token({"type":"my_type_1", "value":"my_value_1"});
+      composite_token = new TwitterCldr.CompositeToken ([token_0, token_1]);
+      expect(composite_token.to_string()).toEqual("my_value_0my_value_1");
+    });
+  });
+});

data/spec/js/tokenizers/segmentation_tokenizer.spec.js ADDED Viewed

@@ -0,0 +1,22 @@
+// Copyright 2012 Twitter, Inc
+// http://www.apache.org/licenses/LICENSE-2.0
+var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
+describe("SegmentationTokenizer", function() {
+  var tokenizer = new TwitterCldr.SegmentationTokenizer();
+  it("should tokenize an expression with a non-break", function() {
+    expect(tokenizer.tokenize("$CB ÷ $SP")).toEqualTokenList([
+      { 'value' : "$CB", 'type' : "variable" },
+      { 'value' : "÷", 'type' : "break" },
+      { 'value' : "$SP", 'type' : "variable" }
+    ]);
+  });
+  it("should tokenize an expression with a non-break", function() {
+    expect(tokenizer.tokenize("$ATerm × $Numeric")).toEqualTokenList([
+      { 'value' : "$ATerm", 'type' : "variable" },
+      { 'value' : "×", 'type' : "no_break" },
+      { 'value' : "$Numeric", 'type' : "variable" }
+    ]);
+  });
+});

data/spec/js/tokenizers/token.spec.js ADDED Viewed

@@ -0,0 +1,25 @@
+// Copyright 2012 Twitter, Inc
+// http://www.apache.org/licenses/LICENSE-2.0
+var TwitterCldr = require('../../../lib/assets/javascripts/twitter_cldr/en.js');
+describe("Token", function() {
+  describe("#constructor", function() {
+    it("should set instance variables passed in the options hash", function() {
+      token = new TwitterCldr.Token({"type":"my_type", "value":"my_value"});
+      expect(token.type).toEqual("my_type");
+      expect(token.value).toEqual("my_value");
+    });
+  });
+  describe("#to_string", function() {
+    it("should return the token's value", function() {
+      expect(new TwitterCldr.Token({"value":"my_value"}).to_string()).toEqual("my_value");
+    });
+  });
+  describe("#to_hash", function() {
+    it("should return the token's attributes as a hash", function() {
+      properties = {"type":"my_type", "value":"my_value"};
+      expect(new TwitterCldr.Token(properties).to_hash()).toEqual(properties);
+    });
+  });
+});

data/spec/js/tokenizers/unicode_regex/unicode_regex_tokenizer.spec.js ADDED Viewed

@@ -0,0 +1,163 @@
+// Copyright 2012 Twitter, Inc
+// http://www.apache.org/licenses/LICENSE-2.0
+var TwitterCldr = require('../../../../lib/assets/javascripts/twitter_cldr/en.js');
+beforeEach(function() {
+  var toEqualTokenList = function (expected) {
+    if (!(this.actual instanceof Array) || !(expected instanceof Array))
+      return false;
+    if (this.actual.length !== expected.length)
+      return false;
+    for (var i = 0; i < this.actual.length; i++) {
+      var hash = expected[i];
+      for (key in expected[i])
+      {
+        if (expected[i][key] !== this.actual[i][key])
+          return false;
+      }
+    }
+    return true;
+  }
+  this.addMatchers({
+    toEqualTokenList : toEqualTokenList
+  });
+});
+describe("UnicodeRegexTokenizer", function() {
+  var tokenizer = new TwitterCldr.UnicodeRegexTokenizer();
+  it("should tokenize a regular regex", function() {
+    expect(tokenizer.tokenize("^(ab)xy$")).toEqualTokenList([
+      { 'value' : "^", 'type' : "negate" },
+      { 'value' : "(", 'type' : "special_char" },
+      { 'value' : "a", 'type' : "string" },
+      { 'value' : "b", 'type' : "string" },
+      { 'value' : ")", 'type' : "special_char" },
+      { 'value' : "x", 'type' : "string" },
+      { 'value' : "y", 'type' : "string" },
+      { 'value' : "$", 'type' : "special_char" }
+    ]);
+  });
+  it("should tokenize a regex containing a basic character class", function() {
+    expect(tokenizer.tokenize("a[bc]d")).toEqualTokenList([
+      { 'value' : "a", 'type' : "string" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "b", 'type' : "string" },
+      { 'value' : "c", 'type' : "string" },
+      { 'value' : "]", 'type' : "close_bracket" },
+      { 'value' : "d", 'type' : "string" }
+    ]);
+  });
+  it("should tokenize a regex containing unicode character sets", function() {
+    expect(tokenizer.tokenize("\\p{Zs}[:Lu:]")).toEqualTokenList([
+      { 'value' : "\\p{Zs}", 'type' : "character_set" },
+      { 'value' : "[:Lu:]",  'type' : "character_set" }
+    ]);
+  });
+  it("should tokenize a regex containing escaped characters", function() {
+    expect(tokenizer.tokenize("^[a\\b]\\$")).toEqualTokenList([
+      { 'value' : "^", 'type' : "negate" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "a", 'type' : "string" },
+      { 'value' : "\\b", 'type' : "escaped_character" },
+      { 'value' : "]", 'type' : "close_bracket" },
+      { 'value' : "\\$", 'type' : "escaped_character" }
+    ]);
+  });
+  it("should tokenize a regex containing basic character ranges", function() {
+    expect(tokenizer.tokenize("[a-z0-9]|[ab]")).toEqualTokenList([
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "a", 'type' : "string" },
+      { 'value' : "-", 'type' : "dash" },
+      { 'value' : "z", 'type' : "string" },
+      { 'value' : "0", 'type' : "string" },
+      { 'value' : "-", 'type' : "dash" },
+      { 'value' : "9", 'type' : "string" },
+      { 'value' : "]", 'type' : "close_bracket" },
+      { 'value' : "|", 'type' : "pipe" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "a", 'type' : "string" },
+      { 'value' : "b", 'type' : "string" },
+      { 'value' : "]", 'type' : "close_bracket" },
+    ]);
+  });
+  it("should tokenize a regex containing escaped unicode characters", function() {
+    expect(tokenizer.tokenize("\\u0020[\\u0123-\\u0155]")).toEqualTokenList([
+      { 'value' : "\\u0020", 'type' : "unicode_char" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "\\u0123", 'type' : "unicode_char" },
+      { 'value' : "-", 'type' : "dash" },
+      { 'value' : "\\u0155", 'type' : "unicode_char" },
+      { 'value' : "]", 'type' : "close_bracket" },
+    ]);
+  });
+  it("should tokenize a regex containing variable substitutions", function() {
+    expect(tokenizer.tokenize("$CR(?:ab)[$LF]")).toEqualTokenList([
+      { 'value' : "$CR", 'type' : "variable" },
+      { 'value' : "(", 'type' : "special_char" },
+      { 'value' : "?", 'type' : "special_char" },
+      { 'value' : ":", 'type' : "special_char" },
+      { 'value' : "a", 'type' : "string" },
+      { 'value' : "b", 'type' : "string" },
+      { 'value' : ")", 'type' : "special_char" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "$LF", 'type' : "variable" },
+      { 'value' : "]", 'type' : "close_bracket" }
+    ]);
+  });
+  it("should tokenize a regex containing multichar strings", function() {
+    expect(tokenizer.tokenize("[{foo}bar]")).toEqualTokenList([
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "{foo}", 'type' : "multichar_string" },
+      { 'value' : "b", 'type' : "string" },
+      { 'value' : "a", 'type' : "string" },
+      { 'value' : "r", 'type' : "string" },
+      { 'value' : "]", 'type' : "close_bracket" }
+    ]);
+  });
+  it("should tokenize a regex containing negated character sets", function() {
+    expect(tokenizer.tokenize("[[:^N:]\\P{L}]")).toEqualTokenList([
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "[:^N:]", 'type' : "negated_character_set" },
+      { 'value' : "\\P{L}", 'type' : "negated_character_set" },
+      { 'value' : "]", 'type' : "close_bracket" }
+    ]);
+  });
+  it("should tokenize a regex containing some of everything", function() {
+    expect(tokenizer.tokenize("^[a-zb]?[^[\\p{Z}\\u0020-\\u007f]-[\\P{L}]-[[:N:]\\u0123]][:^CC:]*[{foo}]+$")).toEqualTokenList([
+      { 'value' : "^", 'type' : "negate" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "a", 'type' : "string" },
+      { 'value' : "-", 'type' : "dash" },
+      { 'value' : "z", 'type' : "string" },
+      { 'value' : "b", 'type' : "string" },
+      { 'value' : "]", 'type' : "close_bracket" },
+      { 'value' : "?", 'type' : "special_char" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "^", 'type' : "negate" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "\\p{Z}", 'type' : "character_set" },
+      { 'value' : "\\u0020", 'type' : "unicode_char" },
+      { 'value' : "-", 'type' : "dash" },
+      { 'value' : "\\u007f", 'type' : "unicode_char" },
+      { 'value' : "]", 'type' : "close_bracket" },
+      { 'value' : "-", 'type' : "dash" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "\\P{L}", 'type' : "negated_character_set" },
+      { 'value' : "]", 'type' : "close_bracket" },
+      { 'value' : "-", 'type' : "dash" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "[:N:]", 'type' : "character_set" },
+      { 'value' : "\\u0123", 'type' : "unicode_char" },
+      { 'value' : "]", 'type' : "close_bracket" },
+      { 'value' : "]", 'type' : "close_bracket" },
+      { 'value' : "[:^CC:]", 'type' : "negated_character_set" },
+      { 'value' : "*", 'type' : "special_char" },
+      { 'value' : "[", 'type' : "open_bracket" },
+      { 'value' : "{foo}", 'type' : "multichar_string" },
+      { 'value' : "]", 'type' : "close_bracket" },
+      { 'value' : "+", 'type' : "special_char" },
+      { 'value' : "$", 'type' : "special_char" }
+    ]);
+  });
+});