twitter_cldr_js 2.3.2 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (153) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -5
  3. data/History.txt +8 -0
  4. data/README.md +69 -1
  5. data/Rakefile +0 -9
  6. data/lib/assets/javascripts/twitter_cldr/af.js +2042 -142
  7. data/lib/assets/javascripts/twitter_cldr/ar.js +2043 -143
  8. data/lib/assets/javascripts/twitter_cldr/be.js +2044 -144
  9. data/lib/assets/javascripts/twitter_cldr/bg.js +2042 -142
  10. data/lib/assets/javascripts/twitter_cldr/bn.js +2042 -142
  11. data/lib/assets/javascripts/twitter_cldr/ca.js +2042 -142
  12. data/lib/assets/javascripts/twitter_cldr/cs.js +2043 -143
  13. data/lib/assets/javascripts/twitter_cldr/cy.js +2043 -143
  14. data/lib/assets/javascripts/twitter_cldr/da.js +2042 -142
  15. data/lib/assets/javascripts/twitter_cldr/de-CH.js +2042 -142
  16. data/lib/assets/javascripts/twitter_cldr/de.js +2042 -142
  17. data/lib/assets/javascripts/twitter_cldr/el.js +2042 -142
  18. data/lib/assets/javascripts/twitter_cldr/en-150.js +2042 -142
  19. data/lib/assets/javascripts/twitter_cldr/en-AU.js +2042 -142
  20. data/lib/assets/javascripts/twitter_cldr/en-CA.js +2042 -142
  21. data/lib/assets/javascripts/twitter_cldr/en-GB.js +2042 -142
  22. data/lib/assets/javascripts/twitter_cldr/en-IE.js +2042 -142
  23. data/lib/assets/javascripts/twitter_cldr/en-SG.js +2042 -142
  24. data/lib/assets/javascripts/twitter_cldr/en-ZA.js +2042 -142
  25. data/lib/assets/javascripts/twitter_cldr/en.js +2042 -142
  26. data/lib/assets/javascripts/twitter_cldr/es-419.js +2042 -142
  27. data/lib/assets/javascripts/twitter_cldr/es-CO.js +2042 -142
  28. data/lib/assets/javascripts/twitter_cldr/es-MX.js +2044 -144
  29. data/lib/assets/javascripts/twitter_cldr/es-US.js +2043 -143
  30. data/lib/assets/javascripts/twitter_cldr/es.js +2043 -143
  31. data/lib/assets/javascripts/twitter_cldr/eu.js +2042 -142
  32. data/lib/assets/javascripts/twitter_cldr/fa.js +2042 -142
  33. data/lib/assets/javascripts/twitter_cldr/fi.js +2042 -142
  34. data/lib/assets/javascripts/twitter_cldr/fil.js +2043 -143
  35. data/lib/assets/javascripts/twitter_cldr/fr-BE.js +2042 -142
  36. data/lib/assets/javascripts/twitter_cldr/fr-CA.js +2042 -142
  37. data/lib/assets/javascripts/twitter_cldr/fr-CH.js +2043 -143
  38. data/lib/assets/javascripts/twitter_cldr/fr.js +2042 -142
  39. data/lib/assets/javascripts/twitter_cldr/ga.js +2044 -144
  40. data/lib/assets/javascripts/twitter_cldr/gl.js +2042 -142
  41. data/lib/assets/javascripts/twitter_cldr/he.js +2043 -143
  42. data/lib/assets/javascripts/twitter_cldr/hi.js +2042 -142
  43. data/lib/assets/javascripts/twitter_cldr/hr.js +2043 -143
  44. data/lib/assets/javascripts/twitter_cldr/hu.js +2042 -142
  45. data/lib/assets/javascripts/twitter_cldr/id.js +2042 -142
  46. data/lib/assets/javascripts/twitter_cldr/is.js +2042 -142
  47. data/lib/assets/javascripts/twitter_cldr/it-CH.js +2043 -143
  48. data/lib/assets/javascripts/twitter_cldr/it.js +2043 -143
  49. data/lib/assets/javascripts/twitter_cldr/ja.js +2042 -142
  50. data/lib/assets/javascripts/twitter_cldr/ko.js +2042 -142
  51. data/lib/assets/javascripts/twitter_cldr/lv.js +2043 -143
  52. data/lib/assets/javascripts/twitter_cldr/msa.js +2043 -143
  53. data/lib/assets/javascripts/twitter_cldr/nl.js +2042 -142
  54. data/lib/assets/javascripts/twitter_cldr/no.js +2042 -142
  55. data/lib/assets/javascripts/twitter_cldr/pl.js +2043 -143
  56. data/lib/assets/javascripts/twitter_cldr/pt.js +2042 -142
  57. data/lib/assets/javascripts/twitter_cldr/ro.js +2043 -143
  58. data/lib/assets/javascripts/twitter_cldr/ru.js +2043 -143
  59. data/lib/assets/javascripts/twitter_cldr/sk.js +2043 -143
  60. data/lib/assets/javascripts/twitter_cldr/sq.js +2042 -142
  61. data/lib/assets/javascripts/twitter_cldr/sr.js +2043 -143
  62. data/lib/assets/javascripts/twitter_cldr/sv.js +2042 -142
  63. data/lib/assets/javascripts/twitter_cldr/ta.js +2042 -142
  64. data/lib/assets/javascripts/twitter_cldr/th.js +2042 -142
  65. data/lib/assets/javascripts/twitter_cldr/tr.js +2042 -142
  66. data/lib/assets/javascripts/twitter_cldr/uk.js +2044 -144
  67. data/lib/assets/javascripts/twitter_cldr/ur.js +2042 -142
  68. data/lib/assets/javascripts/twitter_cldr/vi.js +2042 -142
  69. data/lib/assets/javascripts/twitter_cldr/zh-cn.js +2042 -142
  70. data/lib/assets/javascripts/twitter_cldr/zh-tw.js +2042 -142
  71. data/lib/twitter_cldr/js/compiler.rb +26 -5
  72. data/lib/twitter_cldr/js/mustache/calendars/datetime.coffee +1 -4
  73. data/lib/twitter_cldr/js/mustache/numbers/numbers.coffee +10 -4
  74. data/lib/twitter_cldr/js/mustache/parsers/parser.coffee +32 -0
  75. data/lib/twitter_cldr/js/mustache/parsers/segmentation_parser.coffee +89 -0
  76. data/lib/twitter_cldr/js/mustache/parsers/symbol_table.coffee +14 -0
  77. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_class.coffee +51 -0
  78. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_range.coffee +19 -0
  79. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/character_set.coffee +36 -0
  80. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/component.coffee +48 -0
  81. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/literal.coffee +44 -0
  82. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex/unicode_string.coffee +23 -0
  83. data/lib/twitter_cldr/js/mustache/parsers/unicode_regex_parser.coffee +189 -0
  84. data/lib/twitter_cldr/js/mustache/plurals/rules.coffee +7 -5
  85. data/lib/twitter_cldr/js/mustache/shared/break_iterator.coffee +148 -0
  86. data/lib/twitter_cldr/js/mustache/shared/code_point.coffee +121 -0
  87. data/lib/twitter_cldr/js/mustache/shared/unicode_regex.coffee +41 -0
  88. data/lib/twitter_cldr/js/mustache/tokenizers/composite_token.coffee +11 -0
  89. data/lib/twitter_cldr/js/mustache/tokenizers/segmentation_tokenizer.coffee +24 -0
  90. data/lib/twitter_cldr/js/mustache/tokenizers/token.coffee +14 -0
  91. data/lib/twitter_cldr/js/mustache/tokenizers/tokenizer.coffee +83 -0
  92. data/lib/twitter_cldr/js/mustache/tokenizers/unicode_regex/unicode_regex_tokenizer.coffee +39 -0
  93. data/lib/twitter_cldr/js/mustache/utilities.coffee +45 -0
  94. data/lib/twitter_cldr/js/mustache/utils/code_points.coffee +23 -0
  95. data/lib/twitter_cldr/js/mustache/utils/range.coffee +16 -0
  96. data/lib/twitter_cldr/js/mustache/utils/range_set.coffee +195 -0
  97. data/lib/twitter_cldr/js/renderers.rb +39 -10
  98. data/lib/twitter_cldr/js/renderers/calendars/timespan_renderer.rb +1 -1
  99. data/lib/twitter_cldr/js/renderers/numbers/numbers_renderer.rb +16 -9
  100. data/lib/twitter_cldr/js/renderers/parsers/parser.rb +18 -0
  101. data/lib/twitter_cldr/js/renderers/parsers/segmentation_parser.rb +18 -0
  102. data/lib/twitter_cldr/js/renderers/parsers/symbol_table.rb +18 -0
  103. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_class.rb +18 -0
  104. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_range.rb +18 -0
  105. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/character_set.rb +18 -0
  106. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/component.rb +18 -0
  107. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/literal.rb +18 -0
  108. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex/unicode_string.rb +18 -0
  109. data/lib/twitter_cldr/js/renderers/parsers/unicode_regex_parser.rb +18 -0
  110. data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_renderer.rb +27 -28
  111. data/lib/twitter_cldr/js/renderers/shared/break_iterator_renderer.rb +50 -0
  112. data/lib/twitter_cldr/js/renderers/shared/code_point_renderer.rb +103 -0
  113. data/lib/twitter_cldr/js/renderers/shared/unicode_regex_renderer.rb +18 -0
  114. data/lib/twitter_cldr/js/renderers/tokenizers/composite_token.rb +18 -0
  115. data/lib/twitter_cldr/js/renderers/tokenizers/segmentation_tokenizer.rb +18 -0
  116. data/lib/twitter_cldr/js/renderers/tokenizers/token.rb +18 -0
  117. data/lib/twitter_cldr/js/renderers/tokenizers/tokenizer.rb +18 -0
  118. data/lib/twitter_cldr/js/renderers/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +18 -0
  119. data/lib/twitter_cldr/js/renderers/utils/code_points.rb +18 -0
  120. data/lib/twitter_cldr/js/renderers/utils/range.rb +18 -0
  121. data/lib/twitter_cldr/js/renderers/utils/range_set.rb +18 -0
  122. data/lib/twitter_cldr/js/tasks/tasks.rb +1 -1
  123. data/lib/twitter_cldr/js/version.rb +1 -1
  124. data/spec/js/calendars/datetime.ru.spec.js +17 -0
  125. data/spec/js/calendars/timespan.ru.spec.js +20 -0
  126. data/spec/js/numbers/abbreviated/abbreviated_number.spec.js +5 -5
  127. data/spec/js/numbers/abbreviated/long_decimal.ru.spec.js +24 -0
  128. data/spec/js/numbers/currency.spec.js +1 -1
  129. data/spec/js/parsers/parser.spec.js +74 -0
  130. data/spec/js/parsers/segmentation_parser.spec.js +67 -0
  131. data/spec/js/parsers/symbol_table.spec.js +20 -0
  132. data/spec/js/parsers/unicode_regex/character_class.spec.js +121 -0
  133. data/spec/js/parsers/unicode_regex/character_range.spec.js +17 -0
  134. data/spec/js/parsers/unicode_regex/character_set.spec.js +17 -0
  135. data/spec/js/parsers/unicode_regex/literal.spec.js +30 -0
  136. data/spec/js/parsers/unicode_regex/unicode_string.spec.js +17 -0
  137. data/spec/js/parsers/unicode_regex_parser.spec.js +76 -0
  138. data/spec/js/plurals/plural_rules.spec.js +21 -0
  139. data/spec/js/shared/break_iterator.spec.js +68 -0
  140. data/spec/js/shared/code_point.spec.js +89 -0
  141. data/spec/js/shared/unicode_regex.spec.js +201 -0
  142. data/spec/js/tokenizers/composite_token.spec.js +28 -0
  143. data/spec/js/tokenizers/segmentation_tokenizer.spec.js +22 -0
  144. data/spec/js/tokenizers/token.spec.js +25 -0
  145. data/spec/js/tokenizers/unicode_regex/unicode_regex_tokenizer.spec.js +163 -0
  146. data/spec/js/utilities.spec.js +47 -0
  147. data/spec/js/utils/code_points.spec.js +49 -0
  148. data/spec/js/utils/range_set.spec.js +248 -0
  149. data/twitter_cldr_js.gemspec +8 -6
  150. metadata +128 -34
  151. data/lib/twitter_cldr/js/renderers/plurals/rules/plural_rules_compiler.rb +0 -93
  152. data/spec/ruby/renderers/plurals/plural_rules_compiler_spec.rb +0 -56
  153. data/spec/ruby/spec_helper.rb +0 -11
@@ -0,0 +1,23 @@
1
+ # Copyright 2012 Twitter, Inc
2
+ # http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ class TwitterCldr.UnicodeString extends TwitterCldr.Component
5
+ constructor : (@codepoints) ->
6
+ super
7
+
8
+ to_set : ->
9
+ # If the number of codepoints is greater than 1, treat them as a
10
+ # group (eg. multichar string). This is definitely a hack in that
11
+ # it means there has to be special logic in RangeSet that deals
12
+ # with data types that aren't true integer ranges. I can't think
13
+ # of any other way to support multichar strings :(
14
+
15
+ if @codepoints.length > 1
16
+ new TwitterCldr.RangeSet([new TwitterCldr.Range @codepoints, @codepoints])
17
+ else
18
+ new TwitterCldr.RangeSet([new TwitterCldr.Range @codepoints[0], @codepoints[0]])
19
+
20
+ to_regexp_str : ->
21
+ cps = (if @codepoints instanceof Array then @codepoints else [@codepoints])
22
+ @array_to_regex(cps)
23
+
@@ -0,0 +1,189 @@
1
+ # Copyright 2012 Twitter, Inc
2
+ # http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ class TwitterCldr.UnicodeRegexParser extends TwitterCldr.Parser
5
+
6
+ constructor : ->
7
+ # Types that are allowed to be used in character ranges.
8
+ @character_class_token_types = [
9
+ "variable", "character_set", "negated_character_set", "unicode_char",
10
+ "multichar_string", "string", "escaped_character", "character_range"
11
+ ]
12
+
13
+ @negated_token_types = [
14
+ "negated_character_set"
15
+ ]
16
+
17
+ @binary_operators = [
18
+ "pipe", "ampersand", "dash", "union"
19
+ ]
20
+
21
+ @unary_operators = [
22
+ "negate"
23
+ ]
24
+
25
+ parse : (tokens, options = {}) ->
26
+ super(@preprocess(@substitute_variables(tokens, options.symbol_table)), options)
27
+
28
+ make_token : (type, value) ->
29
+ new TwitterCldr.Token ({"type": type, "value" : value})
30
+
31
+ # Identifies regex ranges and makes implicit operators explicit
32
+ preprocess : (tokens) ->
33
+ result = []
34
+ i = 0
35
+
36
+ while i < tokens.length
37
+ # Character class entities side-by-side are treated as unions. So
38
+ # are side-by-side character classes. Add a special placeholder token
39
+ # to help out the expression parser.
40
+ add_union = (@is_valid_character_class_token(result[result.length-1]) and tokens[i].type != "close_bracket") ||
41
+ (result[result.length-1]? and result[result.length-1].type == "close_bracket" and tokens[i].type == "open_bracket")
42
+ result.push(@make_token("union")) if add_union
43
+
44
+ is_range = @is_valid_character_class_token(tokens[i]) and
45
+ @is_valid_character_class_token(tokens[i + 2]) and
46
+ tokens[i + 1].type == "dash"
47
+ if is_range
48
+ initial = @[tokens[i].type](tokens[i])
49
+ final = @[tokens[i+2].type](tokens[i+2])
50
+ result.push(@make_character_range(initial, final))
51
+ i += 3
52
+ else
53
+ if @is_negated_token(tokens[i])
54
+ result = result.concat [
55
+ @make_token("open_bracket")
56
+ @make_token("negate")
57
+ tokens[i]
58
+ @make_token("close_bracket")
59
+ ]
60
+ else
61
+ result.push(tokens[i])
62
+
63
+ i += 1
64
+
65
+ result
66
+
67
+ substitute_variables : (tokens, symbol_table) ->
68
+ return tokens unless symbol_table?
69
+
70
+ result = []
71
+ for i in [0...tokens.length] by 1
72
+ token = tokens[i]
73
+ if token.type == "variable" and (sub = symbol_table.fetch(token.value))?
74
+ # variables can themselves contain references to other variables
75
+ # note: this could be cached somehow
76
+ result = result.concat(@substitute_variables(sub, symbol_table))
77
+ else
78
+ result.push token
79
+
80
+ result
81
+
82
+ make_character_range : (initial, final) ->
83
+ new TwitterCldr.CharacterRange(initial, final)
84
+
85
+ is_negated_token : (token) ->
86
+ token? and token.type in @negated_token_types
87
+
88
+ is_valid_character_class_token : (token) ->
89
+ token? and token.type in @character_class_token_types
90
+
91
+ is_unary_operator : (token) ->
92
+ token? and token.type in @unary_operators
93
+
94
+ is_binary_operator : (token) ->
95
+ token? and token.type in @binary_operators
96
+
97
+ do_parse : (options) ->
98
+ elements = []
99
+ while @current_token()
100
+ switch @current_token().type
101
+ when "open_bracket"
102
+ elements.push(@character_class())
103
+ when "union"
104
+ @next_token("union")
105
+ else
106
+ elements.push (@[@current_token().type](@current_token()))
107
+ @next_token(@current_token().type)
108
+ elements
109
+
110
+ character_set : (token) ->
111
+ new TwitterCldr.CharacterSet(token.value.replace(/^\\p/g, "").replace(/[\{\}\[\]:]/g, ""))
112
+
113
+ negated_character_set : (token) ->
114
+ new TwitterCldr.CharacterSet(token.value.replace(/^\\[pP]/g, "").replace(/[\{\}\[\]:^]/g, ""))
115
+
116
+ unicode_char : (token) ->
117
+ new TwitterCldr.UnicodeString([parseInt(token.value.replace(/^\\u/g, "").replace(/[\{\}]/g, ""), 16)])
118
+
119
+ string : (token) ->
120
+ new TwitterCldr.UnicodeString(TwitterCldr.Utilities.unpack_string(token.value))
121
+
122
+ multichar_string : (token) ->
123
+ new TwitterCldr.UnicodeString(TwitterCldr.Utilities.unpack_string(token.value.replace(/[\{\}]/g, "")))
124
+
125
+ escaped_character : (token) ->
126
+ new TwitterCldr.Literal(token.value)
127
+
128
+ special_char : (token) ->
129
+ new TwitterCldr.Literal(token.value)
130
+
131
+ negate : (token) ->
132
+ @special_char(token)
133
+
134
+ pipe : (token) ->
135
+ @special_char(token)
136
+
137
+ ampersand : (token) ->
138
+ @special_char(token)
139
+
140
+
141
+ # current_token is already a CharacterRange object
142
+ character_range : (token) ->
143
+ token
144
+
145
+ character_class : ->
146
+ operator_stack = []
147
+ operand_stack = []
148
+ open_count = 0
149
+
150
+ while true
151
+ if @current_token().type in TwitterCldr.CharacterClass.closing_types()
152
+ last_operator = @peek(operator_stack)
153
+ open_count -= 1
154
+ while last_operator.type isnt TwitterCldr.CharacterClass.opening_type_for(@current_token().type)
155
+ operator = operator_stack.pop()
156
+ node = if @is_unary_operator(operator)
157
+ @unary_operator_node(operator.type, operand_stack.pop())
158
+ else
159
+ @binary_operator_node(operator.type, operand_stack.pop(), operand_stack.pop())
160
+
161
+ operand_stack.push(node)
162
+ last_operator = @peek(operator_stack)
163
+
164
+ operator_stack.pop()
165
+
166
+ else if @current_token().type in TwitterCldr.CharacterClass.opening_types()
167
+ open_count += 1
168
+ operator_stack.push(@current_token())
169
+
170
+ else if @current_token().type in @unary_operators.concat(@binary_operators)
171
+ operator_stack.push(@current_token())
172
+
173
+ else
174
+ operand_stack.push(@[@current_token().type](@current_token()))
175
+
176
+ @next_token(@current_token().type)
177
+
178
+ break if operator_stack.length is 0 and open_count is 0
179
+
180
+ new TwitterCldr.CharacterClass(operand_stack.pop())
181
+
182
+ peek : (array) ->
183
+ array[array.length-1]
184
+
185
+ binary_operator_node : (operator, right, left) ->
186
+ new TwitterCldr.CharacterClass.BinaryOperator(operator, left, right)
187
+
188
+ unary_operator_node : (operator, child) ->
189
+ new TwitterCldr.CharacterClass.UnaryOperator(operator, child)
@@ -3,12 +3,14 @@
3
3
 
4
4
  class TwitterCldr.PluralRules
5
5
  @rules = `{{{rules}}}`
6
+ @runtime = `{{{runtime}}}`
7
+ @names = {{{names}}}
6
8
 
7
- @all: ->
8
- return @rules.keys
9
+ @all: (type = 'cardinal') ->
10
+ return @names[type]
9
11
 
10
- @rule_for: (number) ->
12
+ @rule_for: (number, type = 'cardinal') ->
11
13
  try
12
- return @rules.rule(number)
14
+ return @rules[type](number.toString(), @runtime)
13
15
  catch error
14
- return "other"
16
+ return "other"
@@ -0,0 +1,148 @@
1
+ # Copyright 2012 Twitter, Inc
2
+ # http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ class TwitterCldr.BreakIterator
5
+ constructor : (locale = TwitterCldr.locale, options = {}) ->
6
+ @locale = locale
7
+ @use_uli_exceptions = (if options["use_uli_exceptions"]? then options["use_uli_exceptions"] else true)
8
+ @exceptions_cache = {}
9
+ @segmentation_tokenizer = new TwitterCldr.SegmentationTokenizer()
10
+ @segmentation_parser = new TwitterCldr.SegmentationParser()
11
+ @tailoring_resource_data = `{{{tailoring_resource_data}}}`
12
+ @exceptions_resource_data = `{{{exceptions_resource_data}}}`
13
+ @root_resource = `{{{root_resource_data}}}`
14
+
15
+ each_sentence : (str, block) ->
16
+ @each_boundary(str, "sentence", block)
17
+
18
+ each_word : (str, block) ->
19
+ throw "Word segmentation is not currently supported."
20
+
21
+ each_line : (str, block) ->
22
+ throw "Line segmentation is not currently supported."
23
+
24
+ boundary_name_for: (str) ->
25
+ str.replace(/(?:^|\_)([A-Za-z])/, (match) ->
26
+ match.toUpperCase()
27
+ ) + "Break"
28
+
29
+ each_boundary : (str, boundary_type, block) ->
30
+ rules = @compile_rules_for(@locale, boundary_type)
31
+ match = null
32
+ last_offset = 0
33
+ current_position = 0
34
+ search_str = str
35
+ result = []
36
+
37
+ while(search_str.length isnt 0)
38
+ rule = null
39
+ for r in rules
40
+ match = r.match(search_str)
41
+ if match?
42
+ rule = r
43
+ break
44
+ if rule.boundary_symbol is "break"
45
+ break_offset = current_position + match.boundary_offset
46
+ result.push(str.slice(last_offset, break_offset))
47
+ if block?
48
+ block(result[result.length-1])
49
+
50
+ last_offset = break_offset
51
+
52
+ search_str = search_str.slice(match.boundary_offset)
53
+ current_position += match.boundary_offset
54
+
55
+ if last_offset < str.length - 1
56
+ result.push(str.slice(last_offset))
57
+ if block?
58
+ block(str.slice(last_offset))
59
+
60
+
61
+ result
62
+
63
+ compile_exception_rule_for : (locale, boundary_type, boundary_name) ->
64
+ if boundary_type is "sentence"
65
+ cache_key = TwitterCldr.Utilities.compute_cache_key([locale, boundary_type])
66
+
67
+ result = null
68
+ exceptions = @exceptions_for(locale, boundary_name)
69
+ regex_contents = (TwitterCldr.Utilities.regex_escape(exception) for exception in exceptions).join("|")
70
+ @exceptions_cache[cache_key] ||= @segmentation_parser.parse (
71
+ @segmentation_tokenizer.tokenize("(?:"+regex_contents+") \u00D7")
72
+ )
73
+
74
+ # Grabs rules from segment_root, applies custom tailorings (our own, NOT from CLDR),
75
+ # and optionally integrates ULI exceptions.
76
+ compile_rules_for : (locale, boundary_type) ->
77
+ boundary_name = @boundary_name_for(boundary_type)
78
+ boundary_data = @resource_for(boundary_name)
79
+ symbol_table = @symbol_table_for(boundary_data)
80
+ root_rules = @rules_for(boundary_data, symbol_table)
81
+ tailoring_boundary_data = @tailoring_resource_for(locale, boundary_name)
82
+ tailoring_rules = @rules_for(tailoring_boundary_data, symbol_table)
83
+ rules = @merge_rules(root_rules, tailoring_rules)
84
+
85
+ if @use_uli_exceptions is true
86
+ exception_rule = @compile_exception_rule_for(locale, boundary_type, boundary_name)
87
+ rules.unshift(exception_rule)
88
+
89
+ rules
90
+
91
+ # replaces ruleset1's rules with rules with the same id from ruleset2
92
+ merge_rules : (ruleset1, ruleset2) ->
93
+ result = []
94
+ TwitterCldr.Utilities.arraycopy ruleset1, 0, result, 0, ruleset1.length
95
+
96
+ for i in [0...ruleset2.length] by 1
97
+ for j in [0...result.length] by 1
98
+ if ruleset2[i].id == result[j].id
99
+ result[j] = ruleset2[i]
100
+
101
+ result
102
+
103
+ symbol_table_for : (boundary_data) ->
104
+ table = new TwitterCldr.SymbolTable()
105
+
106
+ for i in [0...boundary_data.variables.length] by 1
107
+ variable = boundary_data.variables[i]
108
+ id = variable.id.toString()
109
+ tokens = @segmentation_tokenizer.tokenize(variable.value)
110
+ # note: variables can be redefined (add replaces if key already exists)
111
+ table.add(id, @resolve_symbols(tokens, table))
112
+
113
+ table
114
+
115
+ resolve_symbols : (tokens, symbol_table) ->
116
+ result = []
117
+
118
+ for i in [0...tokens.length]
119
+ token = tokens[i]
120
+ if token.type == "variable"
121
+ result = result.concat(symbol_table.fetch(token.value))
122
+ else
123
+ result.push(token)
124
+
125
+ result
126
+
127
+ rules_for : (boundary_data, symbol_table) ->
128
+ results = []
129
+ for rule in boundary_data.rules
130
+ r = @segmentation_parser.parse(
131
+ @segmentation_tokenizer.tokenize(rule.value), {"symbol_table" : symbol_table}
132
+ )
133
+ r.string = rule.value
134
+ r.id = rule.id
135
+ results.push(r)
136
+
137
+ results
138
+
139
+
140
+ resource_for : (boundary_name) ->
141
+ @root_resource["segments"][boundary_name]
142
+
143
+ tailoring_resource_for : (locale, boundary_name) ->
144
+ @tailoring_resource_data[locale][locale]["segments"][boundary_name]
145
+
146
+ exceptions_for : (locale, boundary_name) ->
147
+ result = @exceptions_resource_data[locale][locale]["exceptions"]
148
+ if result? then result else []
@@ -0,0 +1,121 @@
1
+ # Copyright 2012 Twitter, Inc
2
+ # http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ class TwitterCldr.CodePoint
5
+ @code_point_fields = [
6
+ "code_point"
7
+ "name"
8
+ "category"
9
+ "combining_class"
10
+ "bidi_class"
11
+ "decomposition"
12
+ "digit_value"
13
+ "non_decimal_digit_value"
14
+ "numeric_value"
15
+ "bidi_mirrored"
16
+ "unicode1_name"
17
+ "iso_comment"
18
+ "simple_uppercase_map"
19
+ "simple_lowercase_map"
20
+ "simple_titlecase_map"
21
+ ]
22
+
23
+ decomposition_data_index = 5
24
+ decomposition_regex = /^(?:<(.+)>\s+)?(.+)?$/
25
+ @indices = ["category", "bidi_class", "bidi_mirrored"]
26
+ @properties = ["sentence_break", "line_break", "word_break"]
27
+
28
+ constructor : (@fields) ->
29
+ for i in [0...TwitterCldr.CodePoint.code_point_fields.length] by 1
30
+ field = TwitterCldr.CodePoint.code_point_fields[i]
31
+ unless field is "decomposition"
32
+ @[field] = @fields[i]
33
+
34
+
35
+ decomposition : ->
36
+ decomp = @fields[decomposition_data_index]
37
+ match = decomp.match(decomposition_regex)
38
+ if match?
39
+ if match[2]?
40
+ return (parseInt(s, 16) for s in match[2].match(/\S+/g))
41
+ else
42
+ return null
43
+ else
44
+ throw "decomposition " + decomp + " has invalid format"
45
+
46
+ compatibility_decomposition_tag : ->
47
+ decomp = @fields[decomposition_data_index]
48
+ if (match = decomp.match(decomposition_regex))
49
+ if match[1]? then return match[1] else return null
50
+ else
51
+ throw "decomposition " + decomp + " has invalid format"
52
+
53
+ is_compatibility_decomposition : ->
54
+ return @compatibility_decomposition_tag()?
55
+
56
+ @code_points_for_property : (property_name, value) ->
57
+ property_data = @get_property_data(property_name)
58
+ if property_data?
59
+ property_data[value]
60
+ else
61
+ throw "Couldn't find property " + property_name
62
+
63
+ # Search for code points wherein at least one property value contains prop_value.
64
+ # For example, if prop_value is set to "Zs", this method will return all code
65
+ # points that are considered spaces. If prop value is simply "Z", this method
66
+ # will return all code points who have a property value that contains "Z", i.e.
67
+ # spaces as well as line separators ("Zl") and paragraph separators ("Zp").
68
+ @code_points_for_property_value : (prop_value) ->
69
+ if @index_key_cache[prop_value]?
70
+ return @index_key_cache[prop_value]
71
+
72
+ result = []
73
+ for index_key, index_names of @index_keys
74
+ if index_key.indexOf(prop_value) > -1
75
+ for index_name in index_names
76
+ result = result.concat(@get_index(index_name)[index_key])
77
+
78
+ @index_key_cache[prop_value] = result
79
+
80
+ @index_key_cache = {}
81
+
82
+ @index_keys = `{{{index_keys}}}`
83
+
84
+ @index_data = `{{{index_data}}}`
85
+
86
+ @get_index : (index_name) ->
87
+ return @index_cache[index_name] if @index_cache[index_name]?
88
+ index_data = @index_data[index_name]
89
+ index_data_formatted = {}
90
+ for k, v of index_data
91
+ index_data_formatted[k] = []
92
+ for range in index_data[k]
93
+ index_data_formatted[k].push(new TwitterCldr.Range(range[0], range[1]))
94
+
95
+ @index_cache[index_name] = index_data_formatted
96
+
97
+ @property_data = `{{{property_data}}}`
98
+
99
+ @get_property_data : (property_name) ->
100
+ return @property_data_cache[property_name] if @property_data_cache[property_name]?
101
+ property_data = @property_data[property_name]
102
+ property_data_formatted = {}
103
+ for k, v of property_data
104
+ property_data_formatted[k] = []
105
+ for range in property_data[k]
106
+ property_data_formatted[k].push(new TwitterCldr.Range(range[0], range[1]))
107
+
108
+ @property_data_cache[property_name] = property_data_formatted
109
+
110
+ @index_cache = {}
111
+
112
+ @property_data_cache = {}
113
+
114
+ @get_block_name : (code_point) ->
115
+ if @block_cache[code_point]?
116
+ return @block_cache[code_point]
117
+ for k, range of @blocks
118
+ range = new TwitterCldr.Range(range[0], range[1])
119
+ if range.includes(code_point)
120
+ return @block_cache[code_point] = k
121
+ return null