htmlentities 3.0.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -1,3 +1,18 @@
1
+ == 4.0.0 (2007-03-15)
2
+ * New instantiation-based interface (but legacy interface is preserved for
3
+ compatibility.
4
+ * Handles HTML4 as well as XHTML1 (the former lacks the ' entity).
5
+ * Encodes basic entities numerically when :basic isn't specified and :decimal
6
+ or :hexadecimal is.
7
+ * Performs a maximum of two gsub passes instead of three when encoding, which
8
+ should be more efficient on long strings.
9
+
10
+ == 3.1.0 (2007-01-19)
11
+ * Now understands all the entities referred to in the XHTML 1.0 DTD (253
12
+ entities compared with 131 in version 3.0.1).
13
+ * Calls to_s on parameters to play nicely with Rails 1.2.1.
14
+ * Entity mapping data is now lazily loaded.
15
+
1
16
  == 3.0.1 (2005-04-08)
2
17
  * Improved documentation.
3
18
 
@@ -0,0 +1,44 @@
1
+ == HTMLEntities
2
+
3
+ HTML entity encoding and decoding for Ruby
4
+
5
+ The HTMLEntities module facilitates encoding and decoding of
6
+ (X)HTML entities from/to their corresponding UTF-8 codepoints.
7
+
8
+ To install (requires root/admin privileges):
9
+
10
+ ruby setup.rb
11
+
12
+ Alternatively, you can just use the gem.
13
+
14
+ == Licence
15
+
16
+ This code is free to use under the terms of the MIT licence:
17
+
18
+ Copyright (c) 2007 Paul Battley
19
+
20
+ Permission is hereby granted, free of charge, to any person obtaining a copy
21
+ of this software and associated documentation files (the "Software"), to
22
+ deal in the Software without restriction, including without limitation the
23
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
24
+ sell copies of the Software, and to permit persons to whom the Software is
25
+ furnished to do so, subject to the following conditions:
26
+
27
+ The above copyright notice and this permission notice shall be included in
28
+ all copies or substantial portions of the Software.
29
+
30
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
32
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
33
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
35
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
36
+ IN THE SOFTWARE.
37
+
38
+ If you'd like to negotiate a different licence for a specific use, just
39
+ contact me -- I'll almost certainly permit it.
40
+
41
+ == Contact
42
+
43
+ Comments are welcome. Send an email to pbattley@gmail.com.
44
+
@@ -1,102 +1,47 @@
1
+ require 'htmlentities/legacy'
2
+
1
3
  #
2
4
  # HTML entity encoding and decoding for Ruby
3
5
  #
4
6
 
5
- module HTMLEntities
6
-
7
- class InstructionError < RuntimeError
8
- end
9
-
10
- module Data #:nodoc:
11
-
12
- #
13
- # MAP is a hash of all the HTML entities I could discover, as taken
14
- # from the w3schools page on the subject:
15
- # http://www.w3schools.com/html/html_entitiesref.asp
16
- # The format is 'entity name' => codepoint where entity name is given
17
- # without the surrounding ampersand and semicolon.
18
- #
19
- MAP = {
20
- 'quot' => 34, 'apos' => 39, 'amp' => 38,
21
- 'lt' => 60, 'gt' => 62, 'nbsp' => 160,
22
- 'iexcl' => 161, 'curren' => 164, 'cent' => 162,
23
- 'pound' => 163, 'yen' => 165, 'brvbar' => 166,
24
- 'sect' => 167, 'uml' => 168, 'copy' => 169,
25
- 'ordf' => 170, 'laquo' => 171, 'not' => 172,
26
- 'shy' => 173, 'reg' => 174, 'trade' => 8482,
27
- 'macr' => 175, 'deg' => 176, 'plusmn' => 177,
28
- 'sup2' => 178, 'sup3' => 179, 'acute' => 180,
29
- 'micro' => 181, 'para' => 182, 'middot' => 183,
30
- 'cedil' => 184, 'sup1' => 185, 'ordm' => 186,
31
- 'raquo' => 187, 'frac14' => 188, 'frac12' => 189,
32
- 'frac34' => 190, 'iquest' => 191, 'times' => 215,
33
- 'divide' => 247, 'Agrave' => 192, 'Aacute' => 193,
34
- 'Acirc' => 194, 'Atilde' => 195, 'Auml' => 196,
35
- 'Aring' => 197, 'AElig' => 198, 'Ccedil' => 199,
36
- 'Egrave' => 200, 'Eacute' => 201, 'Ecirc' => 202,
37
- 'Euml' => 203, 'Igrave' => 204, 'Iacute' => 205,
38
- 'Icirc' => 206, 'Iuml' => 207, 'ETH' => 208,
39
- 'Ntilde' => 209, 'Ograve' => 210, 'Oacute' => 211,
40
- 'Ocirc' => 212, 'Otilde' => 213, 'Ouml' => 214,
41
- 'Oslash' => 216, 'Ugrave' => 217, 'Uacute' => 218,
42
- 'Ucirc' => 219, 'Uuml' => 220, 'Yacute' => 221,
43
- 'THORN' => 222, 'szlig' => 223, 'agrave' => 224,
44
- 'aacute' => 225, 'acirc' => 226, 'atilde' => 227,
45
- 'auml' => 228, 'aring' => 229, 'aelig' => 230,
46
- 'ccedil' => 231, 'egrave' => 232, 'eacute' => 233,
47
- 'ecirc' => 234, 'euml' => 235, 'igrave' => 236,
48
- 'iacute' => 237, 'icirc' => 238, 'iuml' => 239,
49
- 'eth' => 240, 'ntilde' => 241, 'ograve' => 242,
50
- 'oacute' => 243, 'ocirc' => 244, 'otilde' => 245,
51
- 'ouml' => 246, 'oslash' => 248, 'ugrave' => 249,
52
- 'uacute' => 250, 'ucirc' => 251, 'uuml' => 252,
53
- 'yacute' => 253, 'thorn' => 254, 'yuml' => 255,
54
- 'OElig' => 338, 'oelig' => 339, 'Scaron' => 352,
55
- 'scaron' => 353, 'Yuml' => 376, 'circ' => 710,
56
- 'tilde' => 732, 'ensp' => 8194, 'emsp' => 8195,
57
- 'thinsp' => 8201, 'zwnj' => 8204, 'zwj' => 8205,
58
- 'lrm' => 8206, 'rlm' => 8207, 'ndash' => 8211,
59
- 'mdash' => 8212, 'lsquo' => 8216, 'rsquo' => 8217,
60
- 'sbquo' => 8218, 'ldquo' => 8220, 'rdquo' => 8221,
61
- 'bdquo' => 8222, 'dagger' => 8224, 'Dagger' => 8225,
62
- 'hellip' => 8230, 'permil' => 8240, 'lsaquo' => 8249,
63
- 'rsaquo' => 8250, 'euro' => 8364
64
- }
7
+ class HTMLEntities
65
8
 
66
- MIN_LENGTH = MAP.keys.map{ |a| a.length }.min
67
- MAX_LENGTH = MAP.keys.map{ |a| a.length }.max
68
- NAMED_ENTITY_REGEXP = /&([a-z]{#{MIN_LENGTH},#{MAX_LENGTH}});/i
69
- REVERSE_MAP = MAP.invert
9
+ VERSION = '4.0.0'
10
+ FLAVORS = %w[html4 xhtml1]
11
+ INSTRUCTIONS = [:basic, :named, :decimal, :hexadecimal]
70
12
 
71
- BASIC_ENTITY_REGEXP = /[<>'"&]/
72
-
73
- UTF8_NON_ASCII_REGEXP = /[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+/
13
+ class InstructionError < RuntimeError
14
+ end
15
+ class UnknownFlavor < RuntimeError
16
+ end
74
17
 
75
- ENCODE_ENTITIES_COMMAND_ORDER = {
76
- :basic => 0,
77
- :named => 1,
78
- :decimal => 2,
79
- :hexadecimal => 3
80
- }
81
-
18
+ #
19
+ # Create a new HTMLEntities coder for the specified flavor.
20
+ # Available flavors are 'html4' and 'xhtml1' (the default).
21
+ # The only difference in functionality between the two is in the handling of the apos
22
+ # (apostrophe) named entity, which is not defined in HTML4.
23
+ #
24
+ def initialize(flavor='xhtml1')
25
+ @flavor = flavor.to_s.downcase
26
+ raise UnknownFlavor, "Unknown flavor #{flavor}" unless FLAVORS.include?(@flavor)
82
27
  end
83
-
28
+
84
29
  #
85
- # Decode XML and HTML 4.01 entities in a string into their UTF-8
30
+ # Decode entities in a string into their UTF-8
86
31
  # equivalents. Obviously, if your string is not already in UTF-8, you'd
87
32
  # better convert it before using this method, or the output will be mixed
88
33
  # up.
89
34
  #
90
- # Unknown named entities are not converted
35
+ # Unknown named entities will not be converted
91
36
  #
92
- def decode_entities(string)
93
- return string.gsub(Data::NAMED_ENTITY_REGEXP) {
94
- (cp = Data::MAP[$1]) ? [cp].pack('U') : $&
95
- }.gsub(/&#([0-9]{1,7});|&#x([0-9a-f]{1,6});/i) {
96
- $1 ? [$1.to_i].pack('U') : [$2.to_i(16)].pack('U')
37
+ def decode(source)
38
+ return source.to_s.gsub(named_entity_regexp) {
39
+ (cp = map[$1]) ? [cp].pack('U') : $&
40
+ }.gsub(/&#([0-9]{1,7});|&#x([0-9a-f]{1,6});/i) {
41
+ $1 ? [$1.to_i].pack('U') : [$2.to_i(16)].pack('U')
97
42
  }
98
43
  end
99
-
44
+
100
45
  #
101
46
  # Encode codepoints into their corresponding entities. Various operations
102
47
  # are possible, and may be specified in order:
@@ -119,48 +64,102 @@ module HTMLEntities
119
64
  # non-ASCII characters replaced with their named entity where possible, and
120
65
  # decimal equivalents otherwise.
121
66
  #
122
- # Note: It is the program's responsibility to ensure that the string
67
+ # Note: It is the program's responsibility to ensure that the source
123
68
  # contains valid UTF-8 before calling this method.
124
69
  #
125
- def encode_entities(string, *instructions)
126
- output = nil
70
+ def encode(source, *instructions)
71
+ string = source.to_s.dup
127
72
  if (instructions.empty?)
128
- instructions = [:basic]
129
- else
130
- instructions = instructions.sort_by { |instruction|
131
- Data::ENCODE_ENTITIES_COMMAND_ORDER[instruction] ||
132
- (raise InstructionError, "unknown encode_entities command `#{instruction.inspect}'")
133
- }
73
+ instructions = [:basic]
74
+ elsif (unknown_instructions = instructions - INSTRUCTIONS) != []
75
+ raise InstructionError,
76
+ "unknown encode_entities command(s): #{unknown_instructions.inspect}"
77
+ end
78
+
79
+ basic_entity_encoder =
80
+ if instructions.include?(:basic) || instructions.include?(:named)
81
+ :encode_named
82
+ elsif instructions.include?(:decimal)
83
+ :encode_decimal
84
+ else instructions.include?(:hexadecimal)
85
+ :encode_hexadecimal
134
86
  end
135
- instructions.each do |instruction|
136
- case instruction
137
- when :basic
138
- # Handled as basic ASCII
139
- output = (output || string).gsub(Data::BASIC_ENTITY_REGEXP) {
140
- # It's safe to use the simpler [0] here because we know
141
- # that the basic entities are ASCII.
142
- '&' << Data::REVERSE_MAP[$&[0]] << ';'
143
- }
144
- when :named
145
- # Test everything except printable ASCII
146
- output = (output || string).gsub(Data::UTF8_NON_ASCII_REGEXP) {
147
- cp = $&.unpack('U')[0]
148
- (e = Data::REVERSE_MAP[cp]) ? "&#{e};" : $&
149
- }
150
- when :decimal
151
- output = (output || string).gsub(Data::UTF8_NON_ASCII_REGEXP) {
152
- "&##{$&.unpack('U')[0]};"
153
- }
154
- when :hexadecimal
155
- output = (output || string).gsub(Data::UTF8_NON_ASCII_REGEXP) {
156
- "&#x#{$&.unpack('U')[0].to_s(16)};"
157
- }
158
- end
87
+ string.gsub!(basic_entity_regexp){ __send__(basic_entity_encoder, $&) }
88
+
89
+ extended_entity_encoders = []
90
+ if instructions.include?(:named)
91
+ extended_entity_encoders << :encode_named
159
92
  end
160
- return output
93
+ if instructions.include?(:decimal)
94
+ extended_entity_encoders << :encode_decimal
95
+ elsif instructions.include?(:hexadecimal)
96
+ extended_entity_encoders << :encode_hexadecimal
97
+ end
98
+ unless extended_entity_encoders.empty?
99
+ string.gsub!(extended_entity_regexp){
100
+ encode_extended(extended_entity_encoders, $&)
101
+ }
102
+ end
103
+
104
+ return string
105
+ end
106
+
107
+ private
108
+
109
+ def map
110
+ @map ||= (require "htmlentities/#{@flavor}"; HTMLEntities::MAPPINGS[@flavor])
111
+ end
112
+
113
+ def basic_entity_regexp
114
+ @basic_entity_regexp ||= (
115
+ case @flavor
116
+ when /^html/
117
+ /[<>"&]/
118
+ else
119
+ /[<>'"&]/
120
+ end
121
+ )
161
122
  end
162
123
 
163
- extend self
124
+ def extended_entity_regexp
125
+ @extended_entity_regexp ||= (
126
+ regexp = '[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+'
127
+ regexp += "|'" if @flavor == 'html4'
128
+ Regexp.new(regexp)
129
+ )
130
+ end
131
+
132
+ def named_entity_regexp
133
+ @named_entity_regexp ||= (
134
+ min_length = map.keys.map{ |a| a.length }.min
135
+ max_length = map.keys.map{ |a| a.length }.max
136
+ /&([a-z][a-z0-9]{#{min_length-1},#{max_length-1}});/i
137
+ )
138
+ end
139
+
140
+ def reverse_map
141
+ @reverse_map ||= map.invert
142
+ end
143
+
144
+ def encode_named(char)
145
+ cp = char.unpack('U')[0]
146
+ (e = reverse_map[cp]) && "&#{e};"
147
+ end
148
+
149
+ def encode_decimal(char)
150
+ "&##{char.unpack('U')[0]};"
151
+ end
152
+
153
+ def encode_hexadecimal(char)
154
+ "&#x#{char.unpack('U')[0].to_s(16)};"
155
+ end
164
156
 
165
- end
157
+ def encode_extended(encoders, char)
158
+ encoders.each do |encoder|
159
+ encoded = __send__(encoder, char)
160
+ return encoded if encoded
161
+ end
162
+ return char
163
+ end
166
164
 
165
+ end
@@ -0,0 +1,257 @@
1
+ class HTMLEntities
2
+ MAPPINGS = {} unless defined? MAPPINGS
3
+ MAPPINGS['html4'] = {
4
+ 'Aacute' => 193,
5
+ 'aacute' => 225,
6
+ 'Acirc' => 194,
7
+ 'acirc' => 226,
8
+ 'acute' => 180,
9
+ 'AElig' => 198,
10
+ 'aelig' => 230,
11
+ 'Agrave' => 192,
12
+ 'agrave' => 224,
13
+ 'alefsym' => 8501,
14
+ 'Alpha' => 913,
15
+ 'alpha' => 945,
16
+ 'amp' => 38,
17
+ 'and' => 8743,
18
+ 'ang' => 8736,
19
+ 'Aring' => 197,
20
+ 'aring' => 229,
21
+ 'asymp' => 8776,
22
+ 'Atilde' => 195,
23
+ 'atilde' => 227,
24
+ 'Auml' => 196,
25
+ 'auml' => 228,
26
+ 'bdquo' => 8222,
27
+ 'Beta' => 914,
28
+ 'beta' => 946,
29
+ 'brvbar' => 166,
30
+ 'bull' => 8226,
31
+ 'cap' => 8745,
32
+ 'Ccedil' => 199,
33
+ 'ccedil' => 231,
34
+ 'cedil' => 184,
35
+ 'cent' => 162,
36
+ 'Chi' => 935,
37
+ 'chi' => 967,
38
+ 'circ' => 710,
39
+ 'clubs' => 9827,
40
+ 'cong' => 8773,
41
+ 'copy' => 169,
42
+ 'crarr' => 8629,
43
+ 'cup' => 8746,
44
+ 'curren' => 164,
45
+ 'Dagger' => 8225,
46
+ 'dagger' => 8224,
47
+ 'dArr' => 8659,
48
+ 'darr' => 8595,
49
+ 'deg' => 176,
50
+ 'Delta' => 916,
51
+ 'delta' => 948,
52
+ 'diams' => 9830,
53
+ 'divide' => 247,
54
+ 'Eacute' => 201,
55
+ 'eacute' => 233,
56
+ 'Ecirc' => 202,
57
+ 'ecirc' => 234,
58
+ 'Egrave' => 200,
59
+ 'egrave' => 232,
60
+ 'empty' => 8709,
61
+ 'emsp' => 8195,
62
+ 'ensp' => 8194,
63
+ 'Epsilon' => 917,
64
+ 'epsilon' => 949,
65
+ 'equiv' => 8801,
66
+ 'Eta' => 919,
67
+ 'eta' => 951,
68
+ 'ETH' => 208,
69
+ 'eth' => 240,
70
+ 'Euml' => 203,
71
+ 'euml' => 235,
72
+ 'euro' => 8364,
73
+ 'exist' => 8707,
74
+ 'fnof' => 402,
75
+ 'forall' => 8704,
76
+ 'frac12' => 189,
77
+ 'frac14' => 188,
78
+ 'frac34' => 190,
79
+ 'frasl' => 8260,
80
+ 'Gamma' => 915,
81
+ 'gamma' => 947,
82
+ 'ge' => 8805,
83
+ 'gt' => 62,
84
+ 'hArr' => 8660,
85
+ 'harr' => 8596,
86
+ 'hearts' => 9829,
87
+ 'hellip' => 8230,
88
+ 'Iacute' => 205,
89
+ 'iacute' => 237,
90
+ 'Icirc' => 206,
91
+ 'icirc' => 238,
92
+ 'iexcl' => 161,
93
+ 'Igrave' => 204,
94
+ 'igrave' => 236,
95
+ 'image' => 8465,
96
+ 'infin' => 8734,
97
+ 'int' => 8747,
98
+ 'Iota' => 921,
99
+ 'iota' => 953,
100
+ 'iquest' => 191,
101
+ 'isin' => 8712,
102
+ 'Iuml' => 207,
103
+ 'iuml' => 239,
104
+ 'Kappa' => 922,
105
+ 'kappa' => 954,
106
+ 'Lambda' => 923,
107
+ 'lambda' => 955,
108
+ 'lang' => 9001,
109
+ 'laquo' => 171,
110
+ 'lArr' => 8656,
111
+ 'larr' => 8592,
112
+ 'lceil' => 8968,
113
+ 'ldquo' => 8220,
114
+ 'le' => 8804,
115
+ 'lfloor' => 8970,
116
+ 'lowast' => 8727,
117
+ 'loz' => 9674,
118
+ 'lrm' => 8206,
119
+ 'lsaquo' => 8249,
120
+ 'lsquo' => 8216,
121
+ 'lt' => 60,
122
+ 'macr' => 175,
123
+ 'mdash' => 8212,
124
+ 'micro' => 181,
125
+ 'middot' => 183,
126
+ 'minus' => 8722,
127
+ 'Mu' => 924,
128
+ 'mu' => 956,
129
+ 'nabla' => 8711,
130
+ 'nbsp' => 160,
131
+ 'ndash' => 8211,
132
+ 'ne' => 8800,
133
+ 'ni' => 8715,
134
+ 'not' => 172,
135
+ 'notin' => 8713,
136
+ 'nsub' => 8836,
137
+ 'Ntilde' => 209,
138
+ 'ntilde' => 241,
139
+ 'Nu' => 925,
140
+ 'nu' => 957,
141
+ 'Oacute' => 211,
142
+ 'oacute' => 243,
143
+ 'Ocirc' => 212,
144
+ 'ocirc' => 244,
145
+ 'OElig' => 338,
146
+ 'oelig' => 339,
147
+ 'Ograve' => 210,
148
+ 'ograve' => 242,
149
+ 'oline' => 8254,
150
+ 'Omega' => 937,
151
+ 'omega' => 969,
152
+ 'Omicron' => 927,
153
+ 'omicron' => 959,
154
+ 'oplus' => 8853,
155
+ 'or' => 8744,
156
+ 'ordf' => 170,
157
+ 'ordm' => 186,
158
+ 'Oslash' => 216,
159
+ 'oslash' => 248,
160
+ 'Otilde' => 213,
161
+ 'otilde' => 245,
162
+ 'otimes' => 8855,
163
+ 'Ouml' => 214,
164
+ 'ouml' => 246,
165
+ 'para' => 182,
166
+ 'part' => 8706,
167
+ 'permil' => 8240,
168
+ 'perp' => 8869,
169
+ 'Phi' => 934,
170
+ 'phi' => 966,
171
+ 'Pi' => 928,
172
+ 'pi' => 960,
173
+ 'piv' => 982,
174
+ 'plusmn' => 177,
175
+ 'pound' => 163,
176
+ 'Prime' => 8243,
177
+ 'prime' => 8242,
178
+ 'prod' => 8719,
179
+ 'prop' => 8733,
180
+ 'Psi' => 936,
181
+ 'psi' => 968,
182
+ 'quot' => 34,
183
+ 'radic' => 8730,
184
+ 'rang' => 9002,
185
+ 'raquo' => 187,
186
+ 'rArr' => 8658,
187
+ 'rarr' => 8594,
188
+ 'rceil' => 8969,
189
+ 'rdquo' => 8221,
190
+ 'real' => 8476,
191
+ 'reg' => 174,
192
+ 'rfloor' => 8971,
193
+ 'Rho' => 929,
194
+ 'rho' => 961,
195
+ 'rlm' => 8207,
196
+ 'rsaquo' => 8250,
197
+ 'rsquo' => 8217,
198
+ 'sbquo' => 8218,
199
+ 'Scaron' => 352,
200
+ 'scaron' => 353,
201
+ 'sdot' => 8901,
202
+ 'sect' => 167,
203
+ 'shy' => 173,
204
+ 'Sigma' => 931,
205
+ 'sigma' => 963,
206
+ 'sigmaf' => 962,
207
+ 'sim' => 8764,
208
+ 'spades' => 9824,
209
+ 'sub' => 8834,
210
+ 'sube' => 8838,
211
+ 'sum' => 8721,
212
+ 'sup' => 8835,
213
+ 'sup1' => 185,
214
+ 'sup2' => 178,
215
+ 'sup3' => 179,
216
+ 'supe' => 8839,
217
+ 'szlig' => 223,
218
+ 'Tau' => 932,
219
+ 'tau' => 964,
220
+ 'there4' => 8756,
221
+ 'Theta' => 920,
222
+ 'theta' => 952,
223
+ 'thetasym' => 977,
224
+ 'thinsp' => 8201,
225
+ 'THORN' => 222,
226
+ 'thorn' => 254,
227
+ 'tilde' => 732,
228
+ 'times' => 215,
229
+ 'trade' => 8482,
230
+ 'Uacute' => 218,
231
+ 'uacute' => 250,
232
+ 'uArr' => 8657,
233
+ 'uarr' => 8593,
234
+ 'Ucirc' => 219,
235
+ 'ucirc' => 251,
236
+ 'Ugrave' => 217,
237
+ 'ugrave' => 249,
238
+ 'uml' => 168,
239
+ 'upsih' => 978,
240
+ 'Upsilon' => 933,
241
+ 'upsilon' => 965,
242
+ 'Uuml' => 220,
243
+ 'uuml' => 252,
244
+ 'weierp' => 8472,
245
+ 'Xi' => 926,
246
+ 'xi' => 958,
247
+ 'Yacute' => 221,
248
+ 'yacute' => 253,
249
+ 'yen' => 165,
250
+ 'Yuml' => 376,
251
+ 'yuml' => 255,
252
+ 'Zeta' => 918,
253
+ 'zeta' => 950,
254
+ 'zwj' => 8205,
255
+ 'zwnj' => 8204
256
+ }
257
+ end