twitter_cldr_js 2.3.2 → 2.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -5
- data/History.txt +8 -0
- data/README.md +69 -1
- data/Rakefile +0 -9
- data/lib/assets/javascripts/twitter_cldr/af.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ar.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/be.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/bg.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/bn.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ca.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/cs.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/cy.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/da.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/de-CH.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/de.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/el.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-150.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-AU.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-CA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-GB.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-IE.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-SG.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en-ZA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/en.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-419.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-CO.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/es-MX.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/es-US.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/es.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/eu.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fa.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fil.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/fr-BE.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fr-CA.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/fr-CH.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/fr.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ga.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/gl.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/he.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/hi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/hr.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/hu.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/id.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/is.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/it-CH.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/it.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/ja.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ko.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/lv.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/msa.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/nl.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/no.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/pl.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/pt.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ro.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/ru.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sk.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sq.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/sr.js +2043 -143
- data/lib/assets/javascripts/twitter_cldr/sv.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/ta.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/th.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/tr.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/uk.js +2044 -144
- data/lib/assets/javascripts/twitter_cldr/ur.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/vi.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/zh-cn.js +2042 -142
- data/lib/assets/javascripts/twitter_cldr/zh-tw.js +2042 -142
- data/lib/twitter_cldr/js/compiler.rb +26 -5
- data/lib/twitter_cldr/js/mustache/calendars/datetime.coffee +1 -4
- data/lib/twitter_cldr/js/mustache/numbers/numbers.coffee +10 -4
- data/lib/twitter_cldr/js/mustache/parsers/parser.coffee +32 -0
- data/lib/twitter_cldr/js/mustache/parsers/segmentation_parser.coffee +89 -0
- data/lib/twitter_cldr/js/mustache/parsers/symbol_table.coffee +14 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_class.coffee +51 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_range.coffee +19 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_set.coffee +36 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/component.coffee +48 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/literal.coffee +44 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/unicode_string.coffee +23 -0
- data/lib/twitter_cldr/js/mustache/parsers/unicode_regex_parser.coffee +189 -0
- data/lib/twitter_cldr/js/mustache/plurals/rules.coffee +7 -5
- data/lib/twitter_cldr/js/mustache/shared/break_iterator.coffee +148 -0
- data/lib/twitter_cldr/js/mustache/shared/code_point.coffee +121 -0
- data/lib/twitter_cldr/js/mustache/shared/unicode_regex.coffee +41 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/composite_token.coffee +11 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/segmentation_tokenizer.coffee +24 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/token.coffee +14 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/tokenizer.coffee +83 -0
- data/lib/twitter_cldr/js/mustache/tokenizers/unicode_regex/unicode_regex_tokenizer.coffee +39 -0
- data/lib/twitter_cldr/js/mustache/utilities.coffee +45 -0
- data/lib/twitter_cldr/js/mustache/utils/code_points.coffee +23 -0
- data/lib/twitter_cldr/js/mustache/utils/range.coffee +16 -0
- data/lib/twitter_cldr/js/mustache/utils/range_set.coffee +195 -0
- data/lib/twitter_cldr/js/renderers.rb +39 -10
- data/lib/twitter_cldr/js/renderers/calendars/timespan_renderer.rb +1 -1
- data/lib/twitter_cldr/js/renderers/numbers/numbers_renderer.rb +16 -9
- data/lib/twitter_cldr/js/renderers/parsers/parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/segmentation_parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/symbol_table.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_class.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_range.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_set.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/component.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/literal.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/unicode_string.rb +18 -0
- data/lib/twitter_cldr/js/renderers/parsers/unicode_regex_parser.rb +18 -0
- data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_renderer.rb +27 -28
- data/lib/twitter_cldr/js/renderers/shared/break_iterator_renderer.rb +50 -0
- data/lib/twitter_cldr/js/renderers/shared/code_point_renderer.rb +103 -0
- data/lib/twitter_cldr/js/renderers/shared/unicode_regex_renderer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/composite_token.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/segmentation_tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/token.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/code_points.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/range.rb +18 -0
- data/lib/twitter_cldr/js/renderers/utils/range_set.rb +18 -0
- data/lib/twitter_cldr/js/tasks/tasks.rb +1 -1
- data/lib/twitter_cldr/js/version.rb +1 -1
- data/spec/js/calendars/datetime.ru.spec.js +17 -0
- data/spec/js/calendars/timespan.ru.spec.js +20 -0
- data/spec/js/numbers/abbreviated/abbreviated_number.spec.js +5 -5
- data/spec/js/numbers/abbreviated/long_decimal.ru.spec.js +24 -0
- data/spec/js/numbers/currency.spec.js +1 -1
- data/spec/js/parsers/parser.spec.js +74 -0
- data/spec/js/parsers/segmentation_parser.spec.js +67 -0
- data/spec/js/parsers/symbol_table.spec.js +20 -0
- data/spec/js/parsers/unicode_regex/character_class.spec.js +121 -0
- data/spec/js/parsers/unicode_regex/character_range.spec.js +17 -0
- data/spec/js/parsers/unicode_regex/character_set.spec.js +17 -0
- data/spec/js/parsers/unicode_regex/literal.spec.js +30 -0
- data/spec/js/parsers/unicode_regex/unicode_string.spec.js +17 -0
- data/spec/js/parsers/unicode_regex_parser.spec.js +76 -0
- data/spec/js/plurals/plural_rules.spec.js +21 -0
- data/spec/js/shared/break_iterator.spec.js +68 -0
- data/spec/js/shared/code_point.spec.js +89 -0
- data/spec/js/shared/unicode_regex.spec.js +201 -0
- data/spec/js/tokenizers/composite_token.spec.js +28 -0
- data/spec/js/tokenizers/segmentation_tokenizer.spec.js +22 -0
- data/spec/js/tokenizers/token.spec.js +25 -0
- data/spec/js/tokenizers/unicode_regex/unicode_regex_tokenizer.spec.js +163 -0
- data/spec/js/utilities.spec.js +47 -0
- data/spec/js/utils/code_points.spec.js +49 -0
- data/spec/js/utils/range_set.spec.js +248 -0
- data/twitter_cldr_js.gemspec +8 -6
- metadata +128 -34
- data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_compiler.rb +0 -93
- data/spec/ruby/renderers/plurals/plural_rules_compiler_spec.rb +0 -56
- data/spec/ruby/spec_helper.rb +0 -11
@@ -16,7 +16,7 @@ module TwitterCldr
|
|
16
16
|
|
17
17
|
def initialize(options = {})
|
18
18
|
@locales = options[:locales] || TwitterCldr.supported_locales
|
19
|
-
@features = options[:features] ||
|
19
|
+
@features = options[:features] || implementation_renderers.keys
|
20
20
|
@prerender = options[:prerender].nil? ? true : options[:prerender]
|
21
21
|
@source_map = options[:source_map]
|
22
22
|
end
|
@@ -28,7 +28,7 @@ module TwitterCldr
|
|
28
28
|
contents = ""
|
29
29
|
|
30
30
|
@features.each do |feature|
|
31
|
-
renderer_const =
|
31
|
+
renderer_const = implementation_renderers[feature]
|
32
32
|
contents << renderer_const.new(:locale => locale, :prerender => @prerender).render if renderer_const
|
33
33
|
end
|
34
34
|
|
@@ -58,8 +58,8 @@ module TwitterCldr
|
|
58
58
|
|
59
59
|
private
|
60
60
|
|
61
|
-
def
|
62
|
-
@
|
61
|
+
def implementation_renderers
|
62
|
+
@implementation_renderers ||= {
|
63
63
|
:plural_rules => TwitterCldr::Js::Renderers::PluralRules::PluralRulesRenderer,
|
64
64
|
:timespan => TwitterCldr::Js::Renderers::Calendars::TimespanRenderer,
|
65
65
|
:datetime => TwitterCldr::Js::Renderers::Calendars::DateTimeRenderer,
|
@@ -69,11 +69,32 @@ module TwitterCldr
|
|
69
69
|
:lists => TwitterCldr::Js::Renderers::Shared::ListRenderer,
|
70
70
|
:bidi => TwitterCldr::Js::Renderers::Shared::BidiRenderer,
|
71
71
|
:calendar => TwitterCldr::Js::Renderers::Shared::CalendarRenderer,
|
72
|
+
:code_point => TwitterCldr::Js::Renderers::Shared::CodePointRenderer,
|
72
73
|
:phone_codes => TwitterCldr::Js::Renderers::Shared::PhoneCodesRenderer,
|
73
74
|
:postal_codes => TwitterCldr::Js::Renderers::Shared::PostalCodesRenderer,
|
74
75
|
:languages => TwitterCldr::Js::Renderers::Shared::LanguagesRenderer,
|
75
|
-
:
|
76
|
+
:unicode_regex => TwitterCldr::Js::Renderers::Shared::UnicodeRegexRenderer,
|
77
|
+
:break_iterator => TwitterCldr::Js::Renderers::Shared::BreakIteratorRenderer,
|
76
78
|
:territories_containment => TwitterCldr::Js::Renderers::Shared::TerritoriesContainmentRenderer,
|
79
|
+
:number_parser => TwitterCldr::Js::Renderers::Parsers::NumberParser,
|
80
|
+
:component => TwitterCldr::Js::Renderers::Parsers::ComponentRenderer,
|
81
|
+
:literal => TwitterCldr::Js::Renderers::Parsers::LiteralRenderer,
|
82
|
+
:unicode_string => TwitterCldr::Js::Renderers::Parsers::UnicodeStringRenderer,
|
83
|
+
:character_class => TwitterCldr::Js::Renderers::Parsers::CharacterClassRenderer,
|
84
|
+
:character_range => TwitterCldr::Js::Renderers::Parsers::CharacterRangeRenderer,
|
85
|
+
:character_set => TwitterCldr::Js::Renderers::Parsers::CharacterSetRenderer,
|
86
|
+
:symbol_table => TwitterCldr::Js::Renderers::Parsers::SymbolTableRenderer,
|
87
|
+
:parser => TwitterCldr::Js::Renderers::Parsers::ParserRenderer,
|
88
|
+
:segmentation_parser => TwitterCldr::Js::Renderers::Parsers::SegmentationParserRenderer,
|
89
|
+
:unicode_regex_parser => TwitterCldr::Js::Renderers::Parsers::UnicodeRegexParserRenderer,
|
90
|
+
:token => TwitterCldr::Js::Renderers::Tokenizers::TokenRenderer,
|
91
|
+
:composite_token => TwitterCldr::Js::Renderers::Tokenizers::CompositeTokenRenderer,
|
92
|
+
:tokenizer => TwitterCldr::Js::Renderers::Tokenizers::TokenizerRenderer,
|
93
|
+
:segmentation_tokenizer => TwitterCldr::Js::Renderers::Tokenizers::SegmentationTokenizerRenderer,
|
94
|
+
:unicode_regex_tokenizer => TwitterCldr::Js::Renderers::Tokenizers::UnicodeRegexTokenizerRenderer,
|
95
|
+
:range => TwitterCldr::Js::Renderers::Utils::RangeRenderer,
|
96
|
+
:range_set => TwitterCldr::Js::Renderers::Utils::RangeSetRenderer,
|
97
|
+
:code_points => TwitterCldr::Js::Renderers::Utils::CodePointsRenderer
|
77
98
|
}
|
78
99
|
end
|
79
100
|
end
|
@@ -42,10 +42,7 @@ class TwitterCldr.DateTimeFormatter
|
|
42
42
|
when "pattern"
|
43
43
|
this.result_for_token(token, obj)
|
44
44
|
else
|
45
|
-
|
46
|
-
token.value.substring(1, token.value.length - 1)
|
47
|
-
else
|
48
|
-
token.value
|
45
|
+
token.value.replace(/'([^']+)'/g, '$1')
|
49
46
|
|
50
47
|
tokens = this.get_tokens(obj, options)
|
51
48
|
(format_token(token) for token in tokens).join("")
|
@@ -22,8 +22,14 @@ class TwitterCldr.NumberFormatter
|
|
22
22
|
tokens = this.get_tokens(number, opts)
|
23
23
|
|
24
24
|
if tokens?
|
25
|
+
if tokens not instanceof Array
|
26
|
+
tokens_sample = tokens[Object.keys(tokens)[0]]
|
27
|
+
truncated_number = @truncate_number(number, tokens_sample[1].length)
|
28
|
+
truncated_number = Math.floor(truncated_number) if opts.precision == 0
|
29
|
+
tokens = tokens[TwitterCldr.PluralRules.rule_for(truncated_number)]
|
30
|
+
|
25
31
|
[prefix, suffix, integer_format, fraction_format] = this.partition_tokens(tokens)
|
26
|
-
number = this.truncate_number(number, integer_format)
|
32
|
+
number = this.truncate_number(number, integer_format.format.length)
|
27
33
|
[intg, fraction] = this.parse_number(number, opts)
|
28
34
|
result = integer_format.apply(parseFloat(intg), opts)
|
29
35
|
result += fraction_format.apply(fraction, opts) if fraction
|
@@ -33,7 +39,7 @@ class TwitterCldr.NumberFormatter
|
|
33
39
|
# there's no specific formatting pattern for this number in current locale
|
34
40
|
number.toString()
|
35
41
|
|
36
|
-
truncate_number: (number,
|
42
|
+
truncate_number: (number, decimal_digits) ->
|
37
43
|
number # noop for base class
|
38
44
|
|
39
45
|
partition_tokens: (tokens) ->
|
@@ -149,9 +155,9 @@ class TwitterCldr.AbbreviatedNumberFormatter extends TwitterCldr.NumberFormatter
|
|
149
155
|
tokens = tokens[format] if format?
|
150
156
|
tokens
|
151
157
|
|
152
|
-
truncate_number: (number,
|
158
|
+
truncate_number: (number, decimal_digits) ->
|
153
159
|
if @NUMBER_MIN <= number and number < @NUMBER_MAX
|
154
|
-
factor = Math.max(0, Math.floor(number).toString().length -
|
160
|
+
factor = Math.max(0, Math.floor(number).toString().length - decimal_digits)
|
155
161
|
number / Math.pow(10, factor)
|
156
162
|
else
|
157
163
|
number
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.Parser
|
5
|
+
|
6
|
+
constructor : (@tokens) ->
|
7
|
+
@token_index = 0
|
8
|
+
|
9
|
+
parse : (tokens, options = {}) ->
|
10
|
+
@tokens = tokens
|
11
|
+
@reset()
|
12
|
+
@do_parse(options)
|
13
|
+
|
14
|
+
reset : ->
|
15
|
+
@token_index = 0
|
16
|
+
|
17
|
+
next_token : (type) ->
|
18
|
+
throw "Unexpected token" unless @current_token().type == type
|
19
|
+
|
20
|
+
@token_index += 1
|
21
|
+
|
22
|
+
while @current_token()? and @is_empty(@current_token())
|
23
|
+
@token_index += 1
|
24
|
+
|
25
|
+
@current_token()
|
26
|
+
|
27
|
+
is_empty : (token) ->
|
28
|
+
token.type == "plaintext" and token.value == ""
|
29
|
+
|
30
|
+
current_token : ->
|
31
|
+
@tokens[@token_index]
|
32
|
+
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.SegmentationParser extends TwitterCldr.Parser
|
5
|
+
constructor : ->
|
6
|
+
@begin_token ||= new TwitterCldr.Token({type : "special_char", value: "^"})
|
7
|
+
@regex_parser ||= new TwitterCldr.UnicodeRegexParser
|
8
|
+
|
9
|
+
class @.RuleMatchData
|
10
|
+
constructor : (@text, @boundary_offset) ->
|
11
|
+
|
12
|
+
class @.Rule
|
13
|
+
constructor : ->
|
14
|
+
|
15
|
+
class @.BreakRule extends @.Rule
|
16
|
+
constructor : (@left, @right) ->
|
17
|
+
@boundary_symbol = "break"
|
18
|
+
super
|
19
|
+
|
20
|
+
match : (str) ->
|
21
|
+
left_match = @left.match(str)
|
22
|
+
|
23
|
+
if @left? and left_match?
|
24
|
+
match_pos = str.indexOf(left_match[0]) + left_match[0].length
|
25
|
+
|
26
|
+
if @right?
|
27
|
+
right_match = @right.match(str.slice(match_pos))
|
28
|
+
|
29
|
+
if right_match?
|
30
|
+
return new TwitterCldr.SegmentationParser.RuleMatchData((left_match[0] + right_match[0]), match_pos)
|
31
|
+
|
32
|
+
else
|
33
|
+
return new TwitterCldr.SegmentationParser.RuleMatchData(str, str.length)
|
34
|
+
return null
|
35
|
+
|
36
|
+
class @.NoBreakRule extends @.Rule
|
37
|
+
constructor : (@regex) ->
|
38
|
+
@boundary_symbol = "no_break"
|
39
|
+
super
|
40
|
+
|
41
|
+
match : (str) ->
|
42
|
+
match = @regex.match(str)
|
43
|
+
if match?
|
44
|
+
new TwitterCldr.SegmentationParser.RuleMatchData(match[0], str.indexOf(match[0]) + match[0].length)
|
45
|
+
else
|
46
|
+
null
|
47
|
+
|
48
|
+
do_parse: (options = {}) ->
|
49
|
+
regex_token_lists = []
|
50
|
+
current_regex_tokens = []
|
51
|
+
boundary_symbol = null
|
52
|
+
|
53
|
+
while @current_token()?
|
54
|
+
switch @current_token().type
|
55
|
+
when "break", "no_break"
|
56
|
+
boundary_symbol = @current_token().type
|
57
|
+
regex_token_lists.push(current_regex_tokens)
|
58
|
+
current_regex_tokens = []
|
59
|
+
else
|
60
|
+
current_regex_tokens.push(@current_token())
|
61
|
+
|
62
|
+
@next_token(@current_token().type)
|
63
|
+
|
64
|
+
regex_token_lists.push(current_regex_tokens)
|
65
|
+
|
66
|
+
result = null
|
67
|
+
|
68
|
+
switch boundary_symbol
|
69
|
+
when "break"
|
70
|
+
result = new TwitterCldr.SegmentationParser.BreakRule(
|
71
|
+
@parse_regex(@add_anchors(regex_token_lists[0]), options),
|
72
|
+
@parse_regex(@add_anchors(regex_token_lists[1]), options)
|
73
|
+
)
|
74
|
+
when "no_break"
|
75
|
+
result = new TwitterCldr.SegmentationParser.NoBreakRule(
|
76
|
+
@parse_regex(
|
77
|
+
@add_anchors(
|
78
|
+
[].concat(regex_token_lists...)
|
79
|
+
), options
|
80
|
+
)
|
81
|
+
)
|
82
|
+
|
83
|
+
result
|
84
|
+
|
85
|
+
add_anchors : (token_list) ->
|
86
|
+
[@begin_token].concat(token_list)
|
87
|
+
|
88
|
+
parse_regex : (tokens, options = {}) ->
|
89
|
+
if tokens? and tokens.length != 0 then new TwitterCldr.UnicodeRegex(@regex_parser.parse(tokens, options)) else null
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.SymbolTable
|
5
|
+
# This is really just a thin layer on top of Hash.
|
6
|
+
# Nice to have it abstracted in case we have to add custom behavior.
|
7
|
+
|
8
|
+
constructor : (@symbols = {}) ->
|
9
|
+
|
10
|
+
fetch : (symbol) ->
|
11
|
+
@symbols[symbol]
|
12
|
+
|
13
|
+
add : (symbol, value) ->
|
14
|
+
@symbols[symbol] = value
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.CharacterClass extends TwitterCldr.Component
|
5
|
+
constructor : (@root) ->
|
6
|
+
@type = "character_class"
|
7
|
+
@grouping_pairs = TwitterCldr.CharacterClass.grouping_pairs
|
8
|
+
super
|
9
|
+
|
10
|
+
@grouping_pairs = {
|
11
|
+
"close_bracket" : "open_bracket"
|
12
|
+
}
|
13
|
+
|
14
|
+
@opening_types : ->
|
15
|
+
(value for key, value of @grouping_pairs)
|
16
|
+
|
17
|
+
@closing_types : ->
|
18
|
+
(key for key, value of @grouping_pairs)
|
19
|
+
|
20
|
+
@opening_type_for : (type) ->
|
21
|
+
if @grouping_pairs[type]? then @grouping_pairs[type] else null
|
22
|
+
|
23
|
+
to_regexp_str : ->
|
24
|
+
@set_to_regex(@to_set())
|
25
|
+
|
26
|
+
to_set : ->
|
27
|
+
@evaluate(@root)
|
28
|
+
|
29
|
+
evaluate : (node) ->
|
30
|
+
if node instanceof TwitterCldr.CharacterClass.UnaryOperator or node instanceof TwitterCldr.CharacterClass.BinaryOperator
|
31
|
+
switch node.operator
|
32
|
+
when "negate"
|
33
|
+
TwitterCldr.UnicodeRegex.get_valid_regexp_chars().subtract(@evaluate(node.child))
|
34
|
+
when "union", "pipe"
|
35
|
+
@evaluate(node.left).union(@evaluate(node.right))
|
36
|
+
when "dash"
|
37
|
+
@evaluate(node.left).difference(@evaluate(node.right))
|
38
|
+
when "ampersand"
|
39
|
+
@evaluate(node.left).intersection(@evaluate(node.right))
|
40
|
+
|
41
|
+
else
|
42
|
+
if node?
|
43
|
+
node.to_set()
|
44
|
+
else
|
45
|
+
new TwitterCldr.RangeSet([])
|
46
|
+
|
47
|
+
class @.BinaryOperator
|
48
|
+
constructor : (@operator, @left, @right) ->
|
49
|
+
|
50
|
+
class @.UnaryOperator
|
51
|
+
constructor : (@operator, @child) ->
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.CharacterRange extends TwitterCldr.Component
|
5
|
+
|
6
|
+
constructor : (@initial, @final) ->
|
7
|
+
# Unfortunately, due to the ambiguity of having both character
|
8
|
+
# ranges and set operations in the same syntax (which both use
|
9
|
+
# the "-" operator and square brackets), we have to treat
|
10
|
+
# CharacterRange as both a token and an operand. This type method
|
11
|
+
# helps it behave like a token.
|
12
|
+
@type = "character_range"
|
13
|
+
super
|
14
|
+
|
15
|
+
to_set : ->
|
16
|
+
new TwitterCldr.RangeSet([new TwitterCldr.Range(
|
17
|
+
@initial.to_set().to_full_array()[0],
|
18
|
+
@final.to_set().to_full_array()[0])
|
19
|
+
])
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.CharacterSet extends TwitterCldr.Component
|
5
|
+
constructor : (text) ->
|
6
|
+
name_parts = text.split("=")
|
7
|
+
if name_parts.length == 2
|
8
|
+
@property = name_parts[0].toLowerCase()
|
9
|
+
@property_value = name_parts[1]
|
10
|
+
else
|
11
|
+
@property_value = text
|
12
|
+
@property = null
|
13
|
+
|
14
|
+
to_regexp_str : ->
|
15
|
+
@set_to_regex(@to_set())
|
16
|
+
|
17
|
+
to_set : ->
|
18
|
+
@codepoints().subtract(
|
19
|
+
TwitterCldr.UnicodeRegex.get_unsupported_chars()
|
20
|
+
).subtract(
|
21
|
+
TwitterCldr.UnicodeRegex.get_invalid_regexp_chars()
|
22
|
+
)
|
23
|
+
|
24
|
+
codepoints : ->
|
25
|
+
if @property?
|
26
|
+
method = "code_points_for_" + @property
|
27
|
+
ranges = TwitterCldr.CodePoint.code_points_for_property(@property, @property_value)
|
28
|
+
|
29
|
+
if ranges?
|
30
|
+
new TwitterCldr.RangeSet(ranges)
|
31
|
+
else
|
32
|
+
throw "Couldn't find property " + @property + " containing property value " + @property_value
|
33
|
+
else
|
34
|
+
new TwitterCldr.RangeSet(
|
35
|
+
TwitterCldr.CodePoint.code_points_for_property_value(@property_value)
|
36
|
+
)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.Component
|
5
|
+
to_utf8 : (codepoints) ->
|
6
|
+
unless codepoints instanceof Array
|
7
|
+
codepoints = [codepoints]
|
8
|
+
|
9
|
+
(@to_hex(cp) for cp in codepoints)
|
10
|
+
|
11
|
+
to_hex : (codepoint) ->
|
12
|
+
if (codepoint >= 0 and codepoint <= 0xD7FF or codepoint >= 0xE000 and codepoint <= 0xFFFF)
|
13
|
+
return @to_escaped_hex(codepoint)
|
14
|
+
else if (codepoint >= 0x10000 and codepoint <= 0x10FFFF)
|
15
|
+
codepoint -= 0x10000
|
16
|
+
first = ((0xffc00 & codepoint) >> 10) + 0xD800
|
17
|
+
second = (0x3ff & codepoint) + 0xDC00
|
18
|
+
return @to_escaped_hex(first) + '+' + @to_escaped_hex(second)
|
19
|
+
|
20
|
+
to_escaped_hex : (codepoint) ->
|
21
|
+
s = codepoint.toString(16)
|
22
|
+
s = "0000".slice(0, 4 - s.length) + s
|
23
|
+
return "\\u" + s;
|
24
|
+
|
25
|
+
range_to_regex : (range) ->
|
26
|
+
if range.first instanceof Array
|
27
|
+
@array_to_regex(range)
|
28
|
+
else
|
29
|
+
"[" + @to_utf8(range.first) + "-" + @to_utf8(range.last) + "]"
|
30
|
+
|
31
|
+
array_to_regex : (arr) ->
|
32
|
+
("(?:" + @to_utf8(c) + ")" for c in arr).join("")
|
33
|
+
|
34
|
+
set_to_regex : (set) ->
|
35
|
+
strs = ((@._set_element_to_regex(element)
|
36
|
+
) for element in TwitterCldr.Utilities.remove_duplicates(
|
37
|
+
set.to_array(true))
|
38
|
+
)
|
39
|
+
|
40
|
+
("(?:" + strs.join("|") + ")")
|
41
|
+
|
42
|
+
_set_element_to_regex : (element) ->
|
43
|
+
if element instanceof TwitterCldr.Range
|
44
|
+
@range_to_regex(element)
|
45
|
+
else if element instanceof Array
|
46
|
+
@array_to_regex(element)
|
47
|
+
else
|
48
|
+
@to_utf8(element)
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Copyright 2012 Twitter, Inc
|
2
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
class TwitterCldr.Literal extends TwitterCldr.Component
|
5
|
+
|
6
|
+
constructor : (@text) ->
|
7
|
+
@special_characters = {
|
8
|
+
s : [32] # space
|
9
|
+
t : [9] # tab
|
10
|
+
r : [13] # carriage return
|
11
|
+
n : [10] # newline
|
12
|
+
f : [12] # form feed
|
13
|
+
d : (char for char in [48..57]) # digits 0-9
|
14
|
+
w : (char for char in [97..122].concat([65..90]).concat([48..57]).concat([95])) # lowercase, uppercase, numbers
|
15
|
+
}
|
16
|
+
|
17
|
+
|
18
|
+
super
|
19
|
+
|
20
|
+
ordinalize : (char) ->
|
21
|
+
TwitterCldr.Utilities.char_code_at(char, 0)
|
22
|
+
|
23
|
+
to_regexp_str : ->
|
24
|
+
@text
|
25
|
+
|
26
|
+
to_set : ->
|
27
|
+
if @text.match(/^\\/)
|
28
|
+
special_char = @text.slice(1)
|
29
|
+
|
30
|
+
if @special_characters[special_char.toLowerCase()]?
|
31
|
+
@set_for_special_char(special_char)
|
32
|
+
|
33
|
+
else
|
34
|
+
TwitterCldr.RangeSet.from_array([@ordinalize(special_char)])
|
35
|
+
|
36
|
+
else
|
37
|
+
TwitterCldr.RangeSet.from_array([@ordinalize(@text)])
|
38
|
+
|
39
|
+
set_for_special_char : (char) ->
|
40
|
+
chars = TwitterCldr.RangeSet.from_array(@special_characters[char.toLowerCase()])
|
41
|
+
if char.toUpperCase() == char
|
42
|
+
TwitterCldr.UnicodeRegex.get_valid_regexp_chars().subtract(chars)
|
43
|
+
else
|
44
|
+
chars
|