twitter_cldr_js 2.3.2 → 2.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -5
- data/History.txt +8 -0
- data/README.md +69 -1
- data/Rakefile +0 -9
- data/lib/assets/javascripts/twitter_cldr/af.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ar.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/be.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/bg.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/bn.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ca.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/cs.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/cy.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/da.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/de-CH.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/de.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/el.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-150.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-AU.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-CA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-GB.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-IE.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-SG.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-ZA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-419.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-CO.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-MX.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/es-US.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/es.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/eu.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fa.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fil.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/fr-BE.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fr-CA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fr-CH.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/fr.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ga.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/gl.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/he.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/hi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/hr.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/hu.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/id.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/is.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/it-CH.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/it.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/ja.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ko.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/lv.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/msa.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/nl.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/no.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/pl.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/pt.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ro.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/ru.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sk.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sq.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/sr.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sv.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ta.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/th.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/tr.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/uk.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/ur.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/vi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/zh-cn.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/zh-tw.js +2042 -142
- data/lib/twitter_cldr/js/compiler.rb +26 -5
- data/lib/twitter_cldr/js/mustache/calendars/datetime.coffee +1 -4
- data/lib/twitter_cldr/js/mustache/numbers/numbers.coffee +10 -4
- data/lib/twitter_cldr/js/mustache/parsers/parser.coffee +32 -0
- data/lib/twitter_cldr/js/mustache/parsers/segmentation_parser.coffee +89 -0
- data/lib/twitter_cldr/js/mustache/parsers/symbol_table.coffee +14 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_class.coffee +51 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_range.coffee +19 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_set.coffee +36 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/component.coffee +48 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/literal.coffee +44 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/unicode_string.coffee +23 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex_parser.coffee +189 -0
- data/lib/twitter_cldr/js/mustache/plurals/rules.coffee +7 -5
- data/lib/twitter_cldr/js/mustache/shared/break_iterator.coffee +148 -0
- data/lib/twitter_cldr/js/mustache/shared/code_point.coffee +121 -0
- data/lib/twitter_cldr/js/mustache/shared/unicode_regex.coffee +41 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/composite_token.coffee +11 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/segmentation_tokenizer.coffee +24 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/token.coffee +14 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/tokenizer.coffee +83 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/unicode_regex/unicode_regex_tokenizer.coffee +39 -0
- data/lib/twitter_cldr/js/mustache/utilities.coffee +45 -0
- data/lib/twitter_cldr/js/mustache/utils/code_points.coffee +23 -0
- data/lib/twitter_cldr/js/mustache/utils/range.coffee +16 -0
- data/lib/twitter_cldr/js/mustache/utils/range_set.coffee +195 -0
- data/lib/twitter_cldr/js/renderers.rb +39 -10
- data/lib/twitter_cldr/js/renderers/calendars/timespan_renderer.rb +1 -1
- data/lib/twitter_cldr/js/renderers/numbers/numbers_renderer.rb +16 -9
- data/lib/twitter_cldr/js/renderers/parsers/parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/segmentation_parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/symbol_table.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_class.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_range.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_set.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/component.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/literal.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/unicode_string.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex_parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_renderer.rb +27 -28
- data/lib/twitter_cldr/js/renderers/shared/break_iterator_renderer.rb +50 -0
- data/lib/twitter_cldr/js/renderers/shared/code_point_renderer.rb +103 -0
- data/lib/twitter_cldr/js/renderers/shared/unicode_regex_renderer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/composite_token.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/segmentation_tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/token.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/code_points.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/range.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/range_set.rb +18 -0
- data/lib/twitter_cldr/js/tasks/tasks.rb +1 -1
- data/lib/twitter_cldr/js/version.rb +1 -1
- data/spec/js/calendars/datetime.ru.spec.js +17 -0
- data/spec/js/calendars/timespan.ru.spec.js +20 -0
- data/spec/js/numbers/abbreviated/abbreviated_number.spec.js +5 -5
- data/spec/js/numbers/abbreviated/long_decimal.ru.spec.js +24 -0
- data/spec/js/numbers/currency.spec.js +1 -1
- data/spec/js/parsers/parser.spec.js +74 -0
- data/spec/js/parsers/segmentation_parser.spec.js +67 -0
- data/spec/js/parsers/symbol_table.spec.js +20 -0
- data/spec/js/parsers/unicode_regex/character_class.spec.js +121 -0
- data/spec/js/parsers/unicode_regex/character_range.spec.js +17 -0
- data/spec/js/parsers/unicode_regex/character_set.spec.js +17 -0
- data/spec/js/parsers/unicode_regex/literal.spec.js +30 -0
- data/spec/js/parsers/unicode_regex/unicode_string.spec.js +17 -0
- data/spec/js/parsers/unicode_regex_parser.spec.js +76 -0
- data/spec/js/plurals/plural_rules.spec.js +21 -0
- data/spec/js/shared/break_iterator.spec.js +68 -0
- data/spec/js/shared/code_point.spec.js +89 -0
- data/spec/js/shared/unicode_regex.spec.js +201 -0
- data/spec/js/tokenizers/composite_token.spec.js +28 -0
- data/spec/js/tokenizers/segmentation_tokenizer.spec.js +22 -0
- data/spec/js/tokenizers/token.spec.js +25 -0
- data/spec/js/tokenizers/unicode_regex/unicode_regex_tokenizer.spec.js +163 -0
- data/spec/js/utilities.spec.js +47 -0
- data/spec/js/utils/code_points.spec.js +49 -0
- data/spec/js/utils/range_set.spec.js +248 -0
- data/twitter_cldr_js.gemspec +8 -6
- metadata +128 -34
- data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_compiler.rb +0 -93
- data/spec/ruby/renderers/plurals/plural_rules_compiler_spec.rb +0 -56
- data/spec/ruby/spec_helper.rb +0 -11
@@ -0,0 +1,41 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.UnicodeRegex
|
5
|
+
constructor : (@elements, @modifiers = "") ->
|
6
|
+
|
7
|
+
@compile : (str, modifiers = "", symbol_table = null) ->
|
8
|
+
new TwitterCldr.UnicodeRegex(@get_parser().parse(
|
9
|
+
@get_tokenizer().tokenize(str), {"symbol_table" : symbol_table}
|
10
|
+
), modifiers)
|
11
|
+
|
12
|
+
@get_all_unicode : ->
|
13
|
+
@all_unicode ||= new TwitterCldr.RangeSet([new TwitterCldr.Range(0, 0xFFFF)])
|
14
|
+
|
15
|
+
# A few <control> characters (i.e. 2..7) and public/private surrogates (i.e. 55296..57343).
|
16
|
+
# These don't play nicely with Ruby's regular expression engine, and I think we
|
17
|
+
# can safely disregard them.
|
18
|
+
@get_invalid_regexp_chars : ->
|
19
|
+
@invalid_regexp_chars ||= new TwitterCldr.RangeSet([(new TwitterCldr.Range(2, 7)),
|
20
|
+
(new TwitterCldr.Range(55296, 57343))])
|
21
|
+
|
22
|
+
@get_valid_regexp_chars : ->
|
23
|
+
@valid_regexp_chars ||= @get_all_unicode().subtract(@get_invalid_regexp_chars())
|
24
|
+
|
25
|
+
@get_unsupported_chars : ->
|
26
|
+
@unsupported_chars ||= new TwitterCldr.RangeSet([new TwitterCldr.Range(0x10000, 0x10FFFF)])
|
27
|
+
|
28
|
+
@get_tokenizer : ->
|
29
|
+
@tokenizer = new TwitterCldr.UnicodeRegexTokenizer()
|
30
|
+
|
31
|
+
@get_parser : ->
|
32
|
+
@parser = new TwitterCldr.UnicodeRegexParser()
|
33
|
+
|
34
|
+
to_regexp_str : ->
|
35
|
+
@regexp_str ||= ((element.to_regexp_str()) for element in @elements).join("")
|
36
|
+
|
37
|
+
to_regexp : ->
|
38
|
+
new RegExp(@to_regexp_str(), @modifiers)
|
39
|
+
|
40
|
+
match : (str) ->
|
41
|
+
str.match(@to_regexp())
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.CompositeToken
|
5
|
+
constructor : (@tokens) ->
|
6
|
+
@type = "composite"
|
7
|
+
|
8
|
+
to_string : ->
|
9
|
+
unless @tokens?
|
10
|
+
return null
|
11
|
+
(token.to_string() for token in @tokens).join("")
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.SegmentationTokenizer
|
5
|
+
constructor : ->
|
6
|
+
recognizers = [
|
7
|
+
new TwitterCldr.TokenRecognizer("break", /\u00f7/, ((val) ->
|
8
|
+
TwitterCldr.Utilities.trim_string(val))) # ÷ character
|
9
|
+
new TwitterCldr.TokenRecognizer("no_break", /\u00d7/, ((val) ->
|
10
|
+
TwitterCldr.Utilities.trim_string(val))) # × character
|
11
|
+
]
|
12
|
+
ur_tokenizer = new TwitterCldr.UnicodeRegexTokenizer
|
13
|
+
ur_tokenizer.insert_before("string", recognizers)
|
14
|
+
@tokenizer = ur_tokenizer
|
15
|
+
|
16
|
+
|
17
|
+
tokenize : (pattern) ->
|
18
|
+
result = []
|
19
|
+
tokens = @tokenizer.tokenize pattern
|
20
|
+
for token in tokens
|
21
|
+
if token.value.replace(/^\s+|\s+$/g, "").length isnt 0
|
22
|
+
result.push token
|
23
|
+
|
24
|
+
result
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.Token
|
5
|
+
|
6
|
+
constructor : (options = {}) ->
|
7
|
+
for k, v of options
|
8
|
+
@[k] = v
|
9
|
+
|
10
|
+
to_hash : ->
|
11
|
+
{ "value" : @value, "type" : @type }
|
12
|
+
|
13
|
+
to_string : ->
|
14
|
+
@value
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.TokenRecognizer
|
5
|
+
constructor : (@token_type, @regex, @cleaner, @content = null) ->
|
6
|
+
|
7
|
+
recognizes : (text) ->
|
8
|
+
@regex.test text
|
9
|
+
|
10
|
+
clean : (val) ->
|
11
|
+
if @cleaner? then @cleaner(val) else val
|
12
|
+
|
13
|
+
class TwitterCldr.Tokenizer
|
14
|
+
constructor : (@recognizers, @custom_splitter = null, @remove_empty_entries = true) ->
|
15
|
+
@splitter = (@custom_splitter || new RegExp("(" + ((recognizer.regex.source
|
16
|
+
) for recognizer in recognizers
|
17
|
+
).join("|") + ")"))
|
18
|
+
|
19
|
+
@union : (tokenizers, block) ->
|
20
|
+
recognizers = []
|
21
|
+
for tokenizer in tokenizers
|
22
|
+
recog_ret = []
|
23
|
+
for recognizer in tokenizer.recognizers
|
24
|
+
if block? and block(recognizer) || !block?
|
25
|
+
recog_ret.push recognizer
|
26
|
+
recognizer.concat(recog_ret)
|
27
|
+
|
28
|
+
flag = true
|
29
|
+
for tokenizer in tokenizers
|
30
|
+
if (custom_splitter? and !(@custom_splitter(tokenizer)))
|
31
|
+
flag = false
|
32
|
+
splitter = null
|
33
|
+
if flag
|
34
|
+
splitter = new Regexp(
|
35
|
+
((tokenizer.custom_splitter.source
|
36
|
+
) for tokenizer in tokenizers).join("|")
|
37
|
+
)
|
38
|
+
new TwitterCldr.Tokenizer(recognizers, splitter)
|
39
|
+
|
40
|
+
recognizer_at : (token_type) ->
|
41
|
+
recognizer for recognizer in @recognizers when recognizer.token_type is token_type
|
42
|
+
|
43
|
+
insert_before : (token_type, new_recognizers) ->
|
44
|
+
|
45
|
+
idx = 0
|
46
|
+
for i in [0...@recognizers.length]
|
47
|
+
recognizer = @recognizers[i]
|
48
|
+
if recognizer.token_type is token_type
|
49
|
+
idx = i
|
50
|
+
for recognizer in new_recognizers
|
51
|
+
@recognizers.splice(idx, 0, recognizer)
|
52
|
+
idx += 1
|
53
|
+
@clear_splitter()
|
54
|
+
null
|
55
|
+
|
56
|
+
tokenize : (text) ->
|
57
|
+
pieces = text.match(new RegExp(@get_splitter().source, "g"))
|
58
|
+
result = []
|
59
|
+
for piece in pieces
|
60
|
+
recognizer = null
|
61
|
+
for r in @recognizers
|
62
|
+
if r.recognizes(piece)
|
63
|
+
recognizer = r
|
64
|
+
break
|
65
|
+
|
66
|
+
if recognizer.token_type is "composite"
|
67
|
+
content = piece.match(recognizer.content)[0]
|
68
|
+
result.push(new TwitterCldr.CompositeToken(@tokenize(content)))
|
69
|
+
|
70
|
+
else
|
71
|
+
cleaned_text = recognizer.clean(piece)
|
72
|
+
if ((@remove_empty_entries and cleaned_text.length > 0) or !@remove_empty_entries)
|
73
|
+
result.push(new TwitterCldr.Token({"value" : cleaned_text, "type" : recognizer.token_type}))
|
74
|
+
|
75
|
+
result
|
76
|
+
|
77
|
+
clear_splitter : ->
|
78
|
+
@splitter = null
|
79
|
+
|
80
|
+
get_splitter : ->
|
81
|
+
@splitter = (@custom_splitter || new RegExp("(" + ((recognizer.regex.source
|
82
|
+
) for recognizer in @recognizers
|
83
|
+
).join("|") + ")"))
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.UnicodeRegexTokenizer
|
5
|
+
constructor : ->
|
6
|
+
recognizers = [
|
7
|
+
# The variable name can contain letters and digits, but must start with a letter.
|
8
|
+
new TwitterCldr.TokenRecognizer("variable", new RegExp(/\$\w[\w\d]*/)),
|
9
|
+
new TwitterCldr.TokenRecognizer("character_set", new RegExp(/\[:\w+:\]|\\p\{[\w=]+\}/)), # [:Lu:] or \p{Lu} or \p{Sentence_Break=CF}
|
10
|
+
new TwitterCldr.TokenRecognizer("negated_character_set", new RegExp(/\[:\^\w+:\]|\\P\{[\w=]+\}/)), #[:^Lu:] or \P{Lu}
|
11
|
+
new TwitterCldr.TokenRecognizer("unicode_char", new RegExp(/\\u\{?[a-fA-F0-9]{1,6}\}?/)),
|
12
|
+
new TwitterCldr.TokenRecognizer("multichar_string", new RegExp(/\{\w+\}/)),
|
13
|
+
|
14
|
+
new TwitterCldr.TokenRecognizer("escaped_character", new RegExp(/\\./)),
|
15
|
+
new TwitterCldr.TokenRecognizer("negate", new RegExp(/\^/)),
|
16
|
+
new TwitterCldr.TokenRecognizer("ampersand", new RegExp(/&/)),
|
17
|
+
new TwitterCldr.TokenRecognizer("pipe", new RegExp(/\|/)),
|
18
|
+
new TwitterCldr.TokenRecognizer("dash", new RegExp(/-/)),
|
19
|
+
|
20
|
+
# stuff that shouldn't be converted to codepoints
|
21
|
+
new TwitterCldr.TokenRecognizer("special_char", new RegExp(/\{\d,?\d?\}|[$?:{}()*+\.,\/\\]/)),
|
22
|
+
|
23
|
+
new TwitterCldr.TokenRecognizer("open_bracket", new RegExp(/\[/)),
|
24
|
+
new TwitterCldr.TokenRecognizer("close_bracket", new RegExp(/\]/)),
|
25
|
+
|
26
|
+
new TwitterCldr.TokenRecognizer("string", new RegExp(/[\s\S]/), ((val) ->
|
27
|
+
if val is " "
|
28
|
+
val
|
29
|
+
else
|
30
|
+
TwitterCldr.Utilities.trim_string(val)
|
31
|
+
))
|
32
|
+
]
|
33
|
+
@tokenizer = new TwitterCldr.Tokenizer(recognizers)
|
34
|
+
|
35
|
+
insert_before : (token_type, new_recognizers) ->
|
36
|
+
@tokenizer.insert_before(token_type, new_recognizers)
|
37
|
+
|
38
|
+
tokenize : (pattern) ->
|
39
|
+
@tokenizer.tokenize(pattern)
|
@@ -85,3 +85,48 @@ class TwitterCldr.Utilities
|
|
85
85
|
|
86
86
|
@is_odd: (num) ->
|
87
87
|
num % 2 == 1
|
88
|
+
|
89
|
+
@remove_duplicates : (arr) ->
|
90
|
+
arr.reduce ((u, elem) ->
|
91
|
+
u.push elem if u.indexOf(elem) < 0
|
92
|
+
u
|
93
|
+
), []
|
94
|
+
|
95
|
+
# This function was adapted from the Mozilla JS reference:
|
96
|
+
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
|
97
|
+
@regex_escape : (s) ->
|
98
|
+
s.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, "\\$1")
|
99
|
+
|
100
|
+
# This function was adapted from the Mozilla JS reference:
|
101
|
+
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/Trim
|
102
|
+
@trim_string : (s) ->
|
103
|
+
s.replace(/^\s+|\s+$/g, '')
|
104
|
+
|
105
|
+
# This function was adapted from the CoffeeScript Cookbook Reference:
|
106
|
+
# http://coffeescriptcookbook.com/chapters/classes_and_objects/cloning
|
107
|
+
@clone : (obj) ->
|
108
|
+
if not obj? or typeof obj isnt 'object'
|
109
|
+
return obj
|
110
|
+
|
111
|
+
if obj instanceof Date
|
112
|
+
return new Date(obj.getTime())
|
113
|
+
|
114
|
+
if obj instanceof RegExp
|
115
|
+
flags = ''
|
116
|
+
flags += 'g' if obj.global?
|
117
|
+
flags += 'i' if obj.ignoreCase?
|
118
|
+
flags += 'm' if obj.multiline?
|
119
|
+
flags += 'y' if obj.sticky?
|
120
|
+
return new RegExp(obj.source, flags)
|
121
|
+
|
122
|
+
newInstance = new obj.constructor()
|
123
|
+
|
124
|
+
for key of obj
|
125
|
+
newInstance[key] = @clone obj[key]
|
126
|
+
|
127
|
+
return newInstance
|
128
|
+
|
129
|
+
@compute_cache_key : (pieces) ->
|
130
|
+
if pieces? and pieces.length > 0
|
131
|
+
return pieces.join("|")
|
132
|
+
return null
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.CodePoints
|
5
|
+
|
6
|
+
@to_char : (code_point) ->
|
7
|
+
TwitterCldr.Utilities.pack_array([code_point])
|
8
|
+
|
9
|
+
@from_char : (char) ->
|
10
|
+
TwitterCldr.Utilities.unpack_string(char[0])[0]
|
11
|
+
|
12
|
+
@from_chars : (chars) ->
|
13
|
+
(@from_char(char) for char in chars)
|
14
|
+
|
15
|
+
@to_chars : (code_points) ->
|
16
|
+
(@to_char(code_point) for code_point in code_points)
|
17
|
+
|
18
|
+
@from_string : (str) ->
|
19
|
+
TwitterCldr.Utilities.unpack_string(str)
|
20
|
+
|
21
|
+
@to_string : (code_points) ->
|
22
|
+
@to_chars(code_points).join("")
|
23
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.Range
|
5
|
+
constructor : (@first, @last) ->
|
6
|
+
if @is_numeric()
|
7
|
+
@size = @last - @first + 1
|
8
|
+
|
9
|
+
to_array : ->
|
10
|
+
[@first..@last]
|
11
|
+
|
12
|
+
includes : (number) ->
|
13
|
+
number >= @first and number <= @last
|
14
|
+
|
15
|
+
is_numeric : ->
|
16
|
+
(!(@first instanceof Array) and !(@last instanceof Array))
|
@@ -0,0 +1,195 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.RangeSet
|
5
|
+
# An integer set, implemented under the hood with ranges. The idea is
|
6
|
+
# that it's more efficient to store sequential data in ranges rather
|
7
|
+
# than as single elements. By definition, RangeSets contain no duplicates.
|
8
|
+
constructor : (ranges) ->
|
9
|
+
@ranges = []
|
10
|
+
for range in ranges
|
11
|
+
if range instanceof TwitterCldr.Range
|
12
|
+
@ranges.push (new TwitterCldr.Range(range.first, range.last))
|
13
|
+
else
|
14
|
+
@ranges.push (new TwitterCldr.Range(range, range))
|
15
|
+
@flatten()
|
16
|
+
|
17
|
+
@from_array : (array, compress = false) ->
|
18
|
+
new TwitterCldr.RangeSet @rangify(array, compress)
|
19
|
+
|
20
|
+
# Turns an array of integers into ranges. The "compress" option indicates
|
21
|
+
# wether or not to turn isolated elements into zero-length ranges or leave
|
22
|
+
# them as single elements.
|
23
|
+
#
|
24
|
+
# For example:
|
25
|
+
# rangify([1, 2, 4], false) returns [1..2, 4..4]
|
26
|
+
# rangify([1, 2, 4], true) returns [1..2, 4]
|
27
|
+
@rangify : (list, compress = false) ->
|
28
|
+
last_item = null
|
29
|
+
|
30
|
+
sorted_list = list.sort( (a,b) ->
|
31
|
+
a - b
|
32
|
+
)
|
33
|
+
sub_lists = []
|
34
|
+
for item in sorted_list
|
35
|
+
if last_item?
|
36
|
+
diff = item - last_item
|
37
|
+
|
38
|
+
if diff > 0
|
39
|
+
if diff is 1
|
40
|
+
sub_lists[sub_lists.length-1].push(item)
|
41
|
+
else
|
42
|
+
sub_lists.push([item])
|
43
|
+
last_item = item
|
44
|
+
else
|
45
|
+
sub_lists.push([item])
|
46
|
+
last_item = item
|
47
|
+
|
48
|
+
( (if compress && sub_list.length is 1
|
49
|
+
sub_list[0]
|
50
|
+
else
|
51
|
+
new TwitterCldr.Range(sub_list[0], sub_list[sub_list.length-1])
|
52
|
+
) for sub_list in sub_lists)
|
53
|
+
|
54
|
+
|
55
|
+
to_array : (compress = false) ->
|
56
|
+
if compress
|
57
|
+
( (if range.first is range.last
|
58
|
+
range.first
|
59
|
+
else
|
60
|
+
TwitterCldr.Utilities.clone(range)
|
61
|
+
) for range in @ranges)
|
62
|
+
else
|
63
|
+
TwitterCldr.Utilities.clone(@ranges)
|
64
|
+
|
65
|
+
to_full_array : ->
|
66
|
+
result = []
|
67
|
+
|
68
|
+
for range in @ranges
|
69
|
+
result = result.concat(range.to_array())
|
70
|
+
|
71
|
+
result
|
72
|
+
|
73
|
+
includes : (obj) ->
|
74
|
+
if obj instanceof TwitterCldr.Range
|
75
|
+
for range in @ranges
|
76
|
+
return true if range.first <= obj.first && range.last >= obj.last
|
77
|
+
else
|
78
|
+
for range in @ranges
|
79
|
+
return true if range.includes(obj)
|
80
|
+
false
|
81
|
+
|
82
|
+
is_empty : ->
|
83
|
+
@ranges.length == 0
|
84
|
+
|
85
|
+
union : (range_set) ->
|
86
|
+
new TwitterCldr.RangeSet @ranges.concat(range_set.ranges)
|
87
|
+
|
88
|
+
intersection : (range_set) ->
|
89
|
+
new_ranges = []
|
90
|
+
for their_range in range_set.ranges
|
91
|
+
for our_range in @ranges
|
92
|
+
if @does_overlap(their_range, our_range)
|
93
|
+
if intrsc = @find_intersection(their_range, our_range)
|
94
|
+
new_ranges.push(intrsc)
|
95
|
+
|
96
|
+
new TwitterCldr.RangeSet new_ranges
|
97
|
+
|
98
|
+
|
99
|
+
subtract : (range_set) ->
|
100
|
+
return (new TwitterCldr.RangeSet(@ranges)) if range_set.is_empty()
|
101
|
+
remaining = TwitterCldr.Utilities.clone(range_set.ranges)
|
102
|
+
current_ranges = TwitterCldr.Utilities.clone(@ranges)
|
103
|
+
|
104
|
+
while their_range = remaining.shift()
|
105
|
+
new_ranges = []
|
106
|
+
|
107
|
+
for our_range in current_ranges
|
108
|
+
if @does_overlap(their_range, our_range)
|
109
|
+
new_ranges = new_ranges.concat(@find_subtraction(their_range, our_range))
|
110
|
+
else
|
111
|
+
new_ranges.push(our_range)
|
112
|
+
|
113
|
+
current_ranges = new_ranges
|
114
|
+
|
115
|
+
new TwitterCldr.RangeSet(new_ranges)
|
116
|
+
|
117
|
+
|
118
|
+
# symmetric difference (the union without the intersection)
|
119
|
+
# http://en.wikipedia.org/wiki/Symmetric_difference
|
120
|
+
difference : (range_set) ->
|
121
|
+
@union(range_set).subtract(@intersection(range_set))
|
122
|
+
|
123
|
+
flatten : ->
|
124
|
+
if @ranges.length <= 1
|
125
|
+
return
|
126
|
+
|
127
|
+
sorted_ranges = @ranges.sort ((a,b) ->
|
128
|
+
if ((!a.is_numeric()) and (!b.is_numeric()))
|
129
|
+
return 1
|
130
|
+
if a.first > b.first
|
131
|
+
return 1
|
132
|
+
else if a.first < b.first
|
133
|
+
return -1
|
134
|
+
else
|
135
|
+
return 0
|
136
|
+
)
|
137
|
+
|
138
|
+
new_ranges = [sorted_ranges[0]]
|
139
|
+
for range in sorted_ranges
|
140
|
+
previous_range = new_ranges.pop()
|
141
|
+
|
142
|
+
if (@are_adjacent(previous_range, range)) or (@does_overlap(previous_range, range))
|
143
|
+
new_ranges.push(new TwitterCldr.Range(TwitterCldr.Utilities.min([range.first, previous_range.first]),TwitterCldr.Utilities.max([range.last, previous_range.last])))
|
144
|
+
else
|
145
|
+
new_ranges.push(previous_range)
|
146
|
+
new_ranges.push(range)
|
147
|
+
|
148
|
+
@ranges = new_ranges
|
149
|
+
|
150
|
+
# returns true if range1 and range2 are within 1 of each other
|
151
|
+
are_adjacent : (range1, range2) ->
|
152
|
+
range1.is_numeric() and range2.is_numeric() and
|
153
|
+
((range1.last is range2.first - 1) or (range2.first is range1.last + 1))
|
154
|
+
|
155
|
+
does_overlap : (range1, range2) ->
|
156
|
+
range1.is_numeric() and range2.is_numeric() and
|
157
|
+
(
|
158
|
+
(range1.last >= range2.first and range1.last <= range2.last) or
|
159
|
+
(range1.first >= range2.first and range1.first <= range2.last) or
|
160
|
+
(range1.first <= range2.first and range1.last >= range2.last)
|
161
|
+
)
|
162
|
+
|
163
|
+
find_intersection : (range1, range2) ->
|
164
|
+
# range2 entirely contains range1
|
165
|
+
if range2.first <= range1.first and range1.last <= range2.last
|
166
|
+
TwitterCldr.Utilities.clone(range1)
|
167
|
+
else if range1.last >= range2.first and range1.last <= range2.last
|
168
|
+
new TwitterCldr.Range(range2.first, range1.last)
|
169
|
+
else if range1.first >= range2.first and range1.first <= range2.last
|
170
|
+
new TwitterCldr.Range(range1.first, range2.last)
|
171
|
+
else if range1.first <= range2.first and range1.last >= range2.last
|
172
|
+
new TwitterCldr.Range(TwitterCldr.Utilities.max([range1.first, range2.first]),TwitterCldr.Utilities.min([range1.last, range2.last]))
|
173
|
+
|
174
|
+
# subtracts range1 from range2 (range2 - range1)
|
175
|
+
find_subtraction : (range1, range2) ->
|
176
|
+
# case: range1 contains range2 entirely (also handles equal case)
|
177
|
+
results = null
|
178
|
+
if range1.first <= range2.first and range2.last <= range1.last
|
179
|
+
results = []
|
180
|
+
# case: range1 comes in the middle
|
181
|
+
else if range2.first <= range1.first and range2.last >= range1.last
|
182
|
+
results = [(new TwitterCldr.Range(range2.first, range1.first - 1)), (new TwitterCldr.Range(range1.last + 1, range2.last))]
|
183
|
+
# case: range1 trails
|
184
|
+
else if range2.last >= range1.first and range1.last >= range2.last
|
185
|
+
results = [new TwitterCldr.Range(range2.first, range1.first - 1)]
|
186
|
+
# case: range1 leads
|
187
|
+
else if range1.last >= range2.first && range1.first <= range2.first
|
188
|
+
results = [new TwitterCldr.Range(range1.last + 1, range2.last)]
|
189
|
+
|
190
|
+
filtered_results = []
|
191
|
+
for range in results
|
192
|
+
if range.first <= range.last
|
193
|
+
filtered_results.push(range)
|
194
|
+
|
195
|
+
filtered_results
|