htmlentities 3.0.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
File without changes
@@ -1,3 +1,18 @@
1
+ == 4.0.0 (2007-03-15)
2
+ * New instantiation-based interface (but legacy interface is preserved for
3
+ compatibility.
4
+ * Handles HTML4 as well as XHTML1 (the former lacks the ' entity).
5
+ * Encodes basic entities numerically when :basic isn't specified and :decimal
6
+ or :hexadecimal is.
7
+ * Performs a maximum of two gsub passes instead of three when encoding, which
8
+ should be more efficient on long strings.
9
+
10
+ == 3.1.0 (2007-01-19)
11
+ * Now understands all the entities referred to in the XHTML 1.0 DTD (253
12
+ entities compared with 131 in version 3.0.1).
13
+ * Calls to_s on parameters to play nicely with Rails 1.2.1.
14
+ * Entity mapping data is now lazily loaded.
15
+
1
16
  == 3.0.1 (2005-04-08)
2
17
  * Improved documentation.
3
18
 
@@ -0,0 +1,44 @@
1
+ == HTMLEntities
2
+
3
+ HTML entity encoding and decoding for Ruby
4
+
5
+ The HTMLEntities module facilitates encoding and decoding of
6
+ (X)HTML entities from/to their corresponding UTF-8 codepoints.
7
+
8
+ To install (requires root/admin privileges):
9
+
10
+ ruby setup.rb
11
+
12
+ Alternatively, you can just use the gem.
13
+
14
+ == Licence
15
+
16
+ This code is free to use under the terms of the MIT licence:
17
+
18
+ Copyright (c) 2007 Paul Battley
19
+
20
+ Permission is hereby granted, free of charge, to any person obtaining a copy
21
+ of this software and associated documentation files (the "Software"), to
22
+ deal in the Software without restriction, including without limitation the
23
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
24
+ sell copies of the Software, and to permit persons to whom the Software is
25
+ furnished to do so, subject to the following conditions:
26
+
27
+ The above copyright notice and this permission notice shall be included in
28
+ all copies or substantial portions of the Software.
29
+
30
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
32
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
33
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
35
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
36
+ IN THE SOFTWARE.
37
+
38
+ If you'd like to negotiate a different licence for a specific use, just
39
+ contact me -- I'll almost certainly permit it.
40
+
41
+ == Contact
42
+
43
+ Comments are welcome. Send an email to pbattley@gmail.com.
44
+
@@ -1,102 +1,47 @@
1
+ require 'htmlentities/legacy'
2
+
1
3
  #
2
4
  # HTML entity encoding and decoding for Ruby
3
5
  #
4
6
 
5
- module HTMLEntities
6
-
7
- class InstructionError < RuntimeError
8
- end
9
-
10
- module Data #:nodoc:
11
-
12
- #
13
- # MAP is a hash of all the HTML entities I could discover, as taken
14
- # from the w3schools page on the subject:
15
- # http://www.w3schools.com/html/html_entitiesref.asp
16
- # The format is 'entity name' => codepoint where entity name is given
17
- # without the surrounding ampersand and semicolon.
18
- #
19
- MAP = {
20
- 'quot' => 34, 'apos' => 39, 'amp' => 38,
21
- 'lt' => 60, 'gt' => 62, 'nbsp' => 160,
22
- 'iexcl' => 161, 'curren' => 164, 'cent' => 162,
23
- 'pound' => 163, 'yen' => 165, 'brvbar' => 166,
24
- 'sect' => 167, 'uml' => 168, 'copy' => 169,
25
- 'ordf' => 170, 'laquo' => 171, 'not' => 172,
26
- 'shy' => 173, 'reg' => 174, 'trade' => 8482,
27
- 'macr' => 175, 'deg' => 176, 'plusmn' => 177,
28
- 'sup2' => 178, 'sup3' => 179, 'acute' => 180,
29
- 'micro' => 181, 'para' => 182, 'middot' => 183,
30
- 'cedil' => 184, 'sup1' => 185, 'ordm' => 186,
31
- 'raquo' => 187, 'frac14' => 188, 'frac12' => 189,
32
- 'frac34' => 190, 'iquest' => 191, 'times' => 215,
33
- 'divide' => 247, 'Agrave' => 192, 'Aacute' => 193,
34
- 'Acirc' => 194, 'Atilde' => 195, 'Auml' => 196,
35
- 'Aring' => 197, 'AElig' => 198, 'Ccedil' => 199,
36
- 'Egrave' => 200, 'Eacute' => 201, 'Ecirc' => 202,
37
- 'Euml' => 203, 'Igrave' => 204, 'Iacute' => 205,
38
- 'Icirc' => 206, 'Iuml' => 207, 'ETH' => 208,
39
- 'Ntilde' => 209, 'Ograve' => 210, 'Oacute' => 211,
40
- 'Ocirc' => 212, 'Otilde' => 213, 'Ouml' => 214,
41
- 'Oslash' => 216, 'Ugrave' => 217, 'Uacute' => 218,
42
- 'Ucirc' => 219, 'Uuml' => 220, 'Yacute' => 221,
43
- 'THORN' => 222, 'szlig' => 223, 'agrave' => 224,
44
- 'aacute' => 225, 'acirc' => 226, 'atilde' => 227,
45
- 'auml' => 228, 'aring' => 229, 'aelig' => 230,
46
- 'ccedil' => 231, 'egrave' => 232, 'eacute' => 233,
47
- 'ecirc' => 234, 'euml' => 235, 'igrave' => 236,
48
- 'iacute' => 237, 'icirc' => 238, 'iuml' => 239,
49
- 'eth' => 240, 'ntilde' => 241, 'ograve' => 242,
50
- 'oacute' => 243, 'ocirc' => 244, 'otilde' => 245,
51
- 'ouml' => 246, 'oslash' => 248, 'ugrave' => 249,
52
- 'uacute' => 250, 'ucirc' => 251, 'uuml' => 252,
53
- 'yacute' => 253, 'thorn' => 254, 'yuml' => 255,
54
- 'OElig' => 338, 'oelig' => 339, 'Scaron' => 352,
55
- 'scaron' => 353, 'Yuml' => 376, 'circ' => 710,
56
- 'tilde' => 732, 'ensp' => 8194, 'emsp' => 8195,
57
- 'thinsp' => 8201, 'zwnj' => 8204, 'zwj' => 8205,
58
- 'lrm' => 8206, 'rlm' => 8207, 'ndash' => 8211,
59
- 'mdash' => 8212, 'lsquo' => 8216, 'rsquo' => 8217,
60
- 'sbquo' => 8218, 'ldquo' => 8220, 'rdquo' => 8221,
61
- 'bdquo' => 8222, 'dagger' => 8224, 'Dagger' => 8225,
62
- 'hellip' => 8230, 'permil' => 8240, 'lsaquo' => 8249,
63
- 'rsaquo' => 8250, 'euro' => 8364
64
- }
7
+ class HTMLEntities
65
8
 
66
- MIN_LENGTH = MAP.keys.map{ |a| a.length }.min
67
- MAX_LENGTH = MAP.keys.map{ |a| a.length }.max
68
- NAMED_ENTITY_REGEXP = /&([a-z]{#{MIN_LENGTH},#{MAX_LENGTH}});/i
69
- REVERSE_MAP = MAP.invert
9
+ VERSION = '4.0.0'
10
+ FLAVORS = %w[html4 xhtml1]
11
+ INSTRUCTIONS = [:basic, :named, :decimal, :hexadecimal]
70
12
 
71
- BASIC_ENTITY_REGEXP = /[<>'"&]/
72
-
73
- UTF8_NON_ASCII_REGEXP = /[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+/
13
+ class InstructionError < RuntimeError
14
+ end
15
+ class UnknownFlavor < RuntimeError
16
+ end
74
17
 
75
- ENCODE_ENTITIES_COMMAND_ORDER = {
76
- :basic => 0,
77
- :named => 1,
78
- :decimal => 2,
79
- :hexadecimal => 3
80
- }
81
-
18
+ #
19
+ # Create a new HTMLEntities coder for the specified flavor.
20
+ # Available flavors are 'html4' and 'xhtml1' (the default).
21
+ # The only difference in functionality between the two is in the handling of the apos
22
+ # (apostrophe) named entity, which is not defined in HTML4.
23
+ #
24
+ def initialize(flavor='xhtml1')
25
+ @flavor = flavor.to_s.downcase
26
+ raise UnknownFlavor, "Unknown flavor #{flavor}" unless FLAVORS.include?(@flavor)
82
27
  end
83
-
28
+
84
29
  #
85
- # Decode XML and HTML 4.01 entities in a string into their UTF-8
30
+ # Decode entities in a string into their UTF-8
86
31
  # equivalents. Obviously, if your string is not already in UTF-8, you'd
87
32
  # better convert it before using this method, or the output will be mixed
88
33
  # up.
89
34
  #
90
- # Unknown named entities are not converted
35
+ # Unknown named entities will not be converted
91
36
  #
92
- def decode_entities(string)
93
- return string.gsub(Data::NAMED_ENTITY_REGEXP) {
94
- (cp = Data::MAP[$1]) ? [cp].pack('U') : $&
95
- }.gsub(/&#([0-9]{1,7});|&#x([0-9a-f]{1,6});/i) {
96
- $1 ? [$1.to_i].pack('U') : [$2.to_i(16)].pack('U')
37
+ def decode(source)
38
+ return source.to_s.gsub(named_entity_regexp) {
39
+ (cp = map[$1]) ? [cp].pack('U') : $&
40
+ }.gsub(/&#([0-9]{1,7});|&#x([0-9a-f]{1,6});/i) {
41
+ $1 ? [$1.to_i].pack('U') : [$2.to_i(16)].pack('U')
97
42
  }
98
43
  end
99
-
44
+
100
45
  #
101
46
  # Encode codepoints into their corresponding entities. Various operations
102
47
  # are possible, and may be specified in order:
@@ -119,48 +64,102 @@ module HTMLEntities
119
64
  # non-ASCII characters replaced with their named entity where possible, and
120
65
  # decimal equivalents otherwise.
121
66
  #
122
- # Note: It is the program's responsibility to ensure that the string
67
+ # Note: It is the program's responsibility to ensure that the source
123
68
  # contains valid UTF-8 before calling this method.
124
69
  #
125
- def encode_entities(string, *instructions)
126
- output = nil
70
+ def encode(source, *instructions)
71
+ string = source.to_s.dup
127
72
  if (instructions.empty?)
128
- instructions = [:basic]
129
- else
130
- instructions = instructions.sort_by { |instruction|
131
- Data::ENCODE_ENTITIES_COMMAND_ORDER[instruction] ||
132
- (raise InstructionError, "unknown encode_entities command `#{instruction.inspect}'")
133
- }
73
+ instructions = [:basic]
74
+ elsif (unknown_instructions = instructions - INSTRUCTIONS) != []
75
+ raise InstructionError,
76
+ "unknown encode_entities command(s): #{unknown_instructions.inspect}"
77
+ end
78
+
79
+ basic_entity_encoder =
80
+ if instructions.include?(:basic) || instructions.include?(:named)
81
+ :encode_named
82
+ elsif instructions.include?(:decimal)
83
+ :encode_decimal
84
+ else instructions.include?(:hexadecimal)
85
+ :encode_hexadecimal
134
86
  end
135
- instructions.each do |instruction|
136
- case instruction
137
- when :basic
138
- # Handled as basic ASCII
139
- output = (output || string).gsub(Data::BASIC_ENTITY_REGEXP) {
140
- # It's safe to use the simpler [0] here because we know
141
- # that the basic entities are ASCII.
142
- '&' << Data::REVERSE_MAP[$&[0]] << ';'
143
- }
144
- when :named
145
- # Test everything except printable ASCII
146
- output = (output || string).gsub(Data::UTF8_NON_ASCII_REGEXP) {
147
- cp = $&.unpack('U')[0]
148
- (e = Data::REVERSE_MAP[cp]) ? "&#{e};" : $&
149
- }
150
- when :decimal
151
- output = (output || string).gsub(Data::UTF8_NON_ASCII_REGEXP) {
152
- "&##{$&.unpack('U')[0]};"
153
- }
154
- when :hexadecimal
155
- output = (output || string).gsub(Data::UTF8_NON_ASCII_REGEXP) {
156
- "&#x#{$&.unpack('U')[0].to_s(16)};"
157
- }
158
- end
87
+ string.gsub!(basic_entity_regexp){ __send__(basic_entity_encoder, $&) }
88
+
89
+ extended_entity_encoders = []
90
+ if instructions.include?(:named)
91
+ extended_entity_encoders << :encode_named
159
92
  end
160
- return output
93
+ if instructions.include?(:decimal)
94
+ extended_entity_encoders << :encode_decimal
95
+ elsif instructions.include?(:hexadecimal)
96
+ extended_entity_encoders << :encode_hexadecimal
97
+ end
98
+ unless extended_entity_encoders.empty?
99
+ string.gsub!(extended_entity_regexp){
100
+ encode_extended(extended_entity_encoders, $&)
101
+ }
102
+ end
103
+
104
+ return string
105
+ end
106
+
107
+ private
108
+
109
+ def map
110
+ @map ||= (require "htmlentities/#{@flavor}"; HTMLEntities::MAPPINGS[@flavor])
111
+ end
112
+
113
+ def basic_entity_regexp
114
+ @basic_entity_regexp ||= (
115
+ case @flavor
116
+ when /^html/
117
+ /[<>"&]/
118
+ else
119
+ /[<>'"&]/
120
+ end
121
+ )
161
122
  end
162
123
 
163
- extend self
124
+ def extended_entity_regexp
125
+ @extended_entity_regexp ||= (
126
+ regexp = '[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+'
127
+ regexp += "|'" if @flavor == 'html4'
128
+ Regexp.new(regexp)
129
+ )
130
+ end
131
+
132
+ def named_entity_regexp
133
+ @named_entity_regexp ||= (
134
+ min_length = map.keys.map{ |a| a.length }.min
135
+ max_length = map.keys.map{ |a| a.length }.max
136
+ /&([a-z][a-z0-9]{#{min_length-1},#{max_length-1}});/i
137
+ )
138
+ end
139
+
140
+ def reverse_map
141
+ @reverse_map ||= map.invert
142
+ end
143
+
144
+ def encode_named(char)
145
+ cp = char.unpack('U')[0]
146
+ (e = reverse_map[cp]) && "&#{e};"
147
+ end
148
+
149
+ def encode_decimal(char)
150
+ "&##{char.unpack('U')[0]};"
151
+ end
152
+
153
+ def encode_hexadecimal(char)
154
+ "&#x#{char.unpack('U')[0].to_s(16)};"
155
+ end
164
156
 
165
- end
157
+ def encode_extended(encoders, char)
158
+ encoders.each do |encoder|
159
+ encoded = __send__(encoder, char)
160
+ return encoded if encoded
161
+ end
162
+ return char
163
+ end
166
164
 
165
+ end
@@ -0,0 +1,257 @@
1
+ class HTMLEntities
2
+ MAPPINGS = {} unless defined? MAPPINGS
3
+ MAPPINGS['html4'] = {
4
+ 'Aacute' => 193,
5
+ 'aacute' => 225,
6
+ 'Acirc' => 194,
7
+ 'acirc' => 226,
8
+ 'acute' => 180,
9
+ 'AElig' => 198,
10
+ 'aelig' => 230,
11
+ 'Agrave' => 192,
12
+ 'agrave' => 224,
13
+ 'alefsym' => 8501,
14
+ 'Alpha' => 913,
15
+ 'alpha' => 945,
16
+ 'amp' => 38,
17
+ 'and' => 8743,
18
+ 'ang' => 8736,
19
+ 'Aring' => 197,
20
+ 'aring' => 229,
21
+ 'asymp' => 8776,
22
+ 'Atilde' => 195,
23
+ 'atilde' => 227,
24
+ 'Auml' => 196,
25
+ 'auml' => 228,
26
+ 'bdquo' => 8222,
27
+ 'Beta' => 914,
28
+ 'beta' => 946,
29
+ 'brvbar' => 166,
30
+ 'bull' => 8226,
31
+ 'cap' => 8745,
32
+ 'Ccedil' => 199,
33
+ 'ccedil' => 231,
34
+ 'cedil' => 184,
35
+ 'cent' => 162,
36
+ 'Chi' => 935,
37
+ 'chi' => 967,
38
+ 'circ' => 710,
39
+ 'clubs' => 9827,
40
+ 'cong' => 8773,
41
+ 'copy' => 169,
42
+ 'crarr' => 8629,
43
+ 'cup' => 8746,
44
+ 'curren' => 164,
45
+ 'Dagger' => 8225,
46
+ 'dagger' => 8224,
47
+ 'dArr' => 8659,
48
+ 'darr' => 8595,
49
+ 'deg' => 176,
50
+ 'Delta' => 916,
51
+ 'delta' => 948,
52
+ 'diams' => 9830,
53
+ 'divide' => 247,
54
+ 'Eacute' => 201,
55
+ 'eacute' => 233,
56
+ 'Ecirc' => 202,
57
+ 'ecirc' => 234,
58
+ 'Egrave' => 200,
59
+ 'egrave' => 232,
60
+ 'empty' => 8709,
61
+ 'emsp' => 8195,
62
+ 'ensp' => 8194,
63
+ 'Epsilon' => 917,
64
+ 'epsilon' => 949,
65
+ 'equiv' => 8801,
66
+ 'Eta' => 919,
67
+ 'eta' => 951,
68
+ 'ETH' => 208,
69
+ 'eth' => 240,
70
+ 'Euml' => 203,
71
+ 'euml' => 235,
72
+ 'euro' => 8364,
73
+ 'exist' => 8707,
74
+ 'fnof' => 402,
75
+ 'forall' => 8704,
76
+ 'frac12' => 189,
77
+ 'frac14' => 188,
78
+ 'frac34' => 190,
79
+ 'frasl' => 8260,
80
+ 'Gamma' => 915,
81
+ 'gamma' => 947,
82
+ 'ge' => 8805,
83
+ 'gt' => 62,
84
+ 'hArr' => 8660,
85
+ 'harr' => 8596,
86
+ 'hearts' => 9829,
87
+ 'hellip' => 8230,
88
+ 'Iacute' => 205,
89
+ 'iacute' => 237,
90
+ 'Icirc' => 206,
91
+ 'icirc' => 238,
92
+ 'iexcl' => 161,
93
+ 'Igrave' => 204,
94
+ 'igrave' => 236,
95
+ 'image' => 8465,
96
+ 'infin' => 8734,
97
+ 'int' => 8747,
98
+ 'Iota' => 921,
99
+ 'iota' => 953,
100
+ 'iquest' => 191,
101
+ 'isin' => 8712,
102
+ 'Iuml' => 207,
103
+ 'iuml' => 239,
104
+ 'Kappa' => 922,
105
+ 'kappa' => 954,
106
+ 'Lambda' => 923,
107
+ 'lambda' => 955,
108
+ 'lang' => 9001,
109
+ 'laquo' => 171,
110
+ 'lArr' => 8656,
111
+ 'larr' => 8592,
112
+ 'lceil' => 8968,
113
+ 'ldquo' => 8220,
114
+ 'le' => 8804,
115
+ 'lfloor' => 8970,
116
+ 'lowast' => 8727,
117
+ 'loz' => 9674,
118
+ 'lrm' => 8206,
119
+ 'lsaquo' => 8249,
120
+ 'lsquo' => 8216,
121
+ 'lt' => 60,
122
+ 'macr' => 175,
123
+ 'mdash' => 8212,
124
+ 'micro' => 181,
125
+ 'middot' => 183,
126
+ 'minus' => 8722,
127
+ 'Mu' => 924,
128
+ 'mu' => 956,
129
+ 'nabla' => 8711,
130
+ 'nbsp' => 160,
131
+ 'ndash' => 8211,
132
+ 'ne' => 8800,
133
+ 'ni' => 8715,
134
+ 'not' => 172,
135
+ 'notin' => 8713,
136
+ 'nsub' => 8836,
137
+ 'Ntilde' => 209,
138
+ 'ntilde' => 241,
139
+ 'Nu' => 925,
140
+ 'nu' => 957,
141
+ 'Oacute' => 211,
142
+ 'oacute' => 243,
143
+ 'Ocirc' => 212,
144
+ 'ocirc' => 244,
145
+ 'OElig' => 338,
146
+ 'oelig' => 339,
147
+ 'Ograve' => 210,
148
+ 'ograve' => 242,
149
+ 'oline' => 8254,
150
+ 'Omega' => 937,
151
+ 'omega' => 969,
152
+ 'Omicron' => 927,
153
+ 'omicron' => 959,
154
+ 'oplus' => 8853,
155
+ 'or' => 8744,
156
+ 'ordf' => 170,
157
+ 'ordm' => 186,
158
+ 'Oslash' => 216,
159
+ 'oslash' => 248,
160
+ 'Otilde' => 213,
161
+ 'otilde' => 245,
162
+ 'otimes' => 8855,
163
+ 'Ouml' => 214,
164
+ 'ouml' => 246,
165
+ 'para' => 182,
166
+ 'part' => 8706,
167
+ 'permil' => 8240,
168
+ 'perp' => 8869,
169
+ 'Phi' => 934,
170
+ 'phi' => 966,
171
+ 'Pi' => 928,
172
+ 'pi' => 960,
173
+ 'piv' => 982,
174
+ 'plusmn' => 177,
175
+ 'pound' => 163,
176
+ 'Prime' => 8243,
177
+ 'prime' => 8242,
178
+ 'prod' => 8719,
179
+ 'prop' => 8733,
180
+ 'Psi' => 936,
181
+ 'psi' => 968,
182
+ 'quot' => 34,
183
+ 'radic' => 8730,
184
+ 'rang' => 9002,
185
+ 'raquo' => 187,
186
+ 'rArr' => 8658,
187
+ 'rarr' => 8594,
188
+ 'rceil' => 8969,
189
+ 'rdquo' => 8221,
190
+ 'real' => 8476,
191
+ 'reg' => 174,
192
+ 'rfloor' => 8971,
193
+ 'Rho' => 929,
194
+ 'rho' => 961,
195
+ 'rlm' => 8207,
196
+ 'rsaquo' => 8250,
197
+ 'rsquo' => 8217,
198
+ 'sbquo' => 8218,
199
+ 'Scaron' => 352,
200
+ 'scaron' => 353,
201
+ 'sdot' => 8901,
202
+ 'sect' => 167,
203
+ 'shy' => 173,
204
+ 'Sigma' => 931,
205
+ 'sigma' => 963,
206
+ 'sigmaf' => 962,
207
+ 'sim' => 8764,
208
+ 'spades' => 9824,
209
+ 'sub' => 8834,
210
+ 'sube' => 8838,
211
+ 'sum' => 8721,
212
+ 'sup' => 8835,
213
+ 'sup1' => 185,
214
+ 'sup2' => 178,
215
+ 'sup3' => 179,
216
+ 'supe' => 8839,
217
+ 'szlig' => 223,
218
+ 'Tau' => 932,
219
+ 'tau' => 964,
220
+ 'there4' => 8756,
221
+ 'Theta' => 920,
222
+ 'theta' => 952,
223
+ 'thetasym' => 977,
224
+ 'thinsp' => 8201,
225
+ 'THORN' => 222,
226
+ 'thorn' => 254,
227
+ 'tilde' => 732,
228
+ 'times' => 215,
229
+ 'trade' => 8482,
230
+ 'Uacute' => 218,
231
+ 'uacute' => 250,
232
+ 'uArr' => 8657,
233
+ 'uarr' => 8593,
234
+ 'Ucirc' => 219,
235
+ 'ucirc' => 251,
236
+ 'Ugrave' => 217,
237
+ 'ugrave' => 249,
238
+ 'uml' => 168,
239
+ 'upsih' => 978,
240
+ 'Upsilon' => 933,
241
+ 'upsilon' => 965,
242
+ 'Uuml' => 220,
243
+ 'uuml' => 252,
244
+ 'weierp' => 8472,
245
+ 'Xi' => 926,
246
+ 'xi' => 958,
247
+ 'Yacute' => 221,
248
+ 'yacute' => 253,
249
+ 'yen' => 165,
250
+ 'Yuml' => 376,
251
+ 'yuml' => 255,
252
+ 'Zeta' => 918,
253
+ 'zeta' => 950,
254
+ 'zwj' => 8205,
255
+ 'zwnj' => 8204
256
+ }
257
+ end