feedtools 0.2.26 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,17 @@
1
+ class String
2
+ alias old_format %
3
+ define_method("%") do |data|
4
+ unless data.kind_of?(Hash)
5
+ $VERBOSE = false
6
+ r = old_format(data)
7
+ $VERBOSE = true
8
+ r
9
+ else
10
+ ret = self.clone
11
+ data.each do |k,v|
12
+ ret.gsub!(/\%\(#{k}\)/, v)
13
+ end
14
+ ret
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ require 'html5/html5parser'
2
+ require 'html5/version'
3
+
4
+ module HTML5
5
+
6
+ def self.parse(stream, options={})
7
+ HTMLParser.parse(stream, options)
8
+ end
9
+
10
+ def self.parse_fragment(stream, options={})
11
+ HTMLParser.parse(stream, options)
12
+ end
13
+ end
@@ -0,0 +1,1046 @@
1
+ module HTML5
2
+
3
+ class EOF < Exception; end
4
+
5
+ def self._(str); str end
6
+
7
+ CONTENT_MODEL_FLAGS = [
8
+ :PCDATA,
9
+ :RCDATA,
10
+ :CDATA,
11
+ :PLAINTEXT
12
+ ]
13
+
14
+ SCOPING_ELEMENTS = %w[
15
+ button
16
+ caption
17
+ html
18
+ marquee
19
+ object
20
+ table
21
+ td
22
+ th
23
+ ]
24
+
25
+ FORMATTING_ELEMENTS = %w[
26
+ a
27
+ b
28
+ big
29
+ em
30
+ font
31
+ i
32
+ nobr
33
+ s
34
+ small
35
+ strike
36
+ strong
37
+ tt
38
+ u
39
+ ]
40
+
41
+ SPECIAL_ELEMENTS = %w[
42
+ address
43
+ area
44
+ base
45
+ basefont
46
+ bgsound
47
+ blockquote
48
+ body
49
+ br
50
+ center
51
+ col
52
+ colgroup
53
+ dd
54
+ dir
55
+ div
56
+ dl
57
+ dt
58
+ embed
59
+ fieldset
60
+ form
61
+ frame
62
+ frameset
63
+ h1
64
+ h2
65
+ h3
66
+ h4
67
+ h5
68
+ h6
69
+ head
70
+ hr
71
+ iframe
72
+ image
73
+ img
74
+ input
75
+ isindex
76
+ li
77
+ link
78
+ listing
79
+ menu
80
+ meta
81
+ noembed
82
+ noframes
83
+ noscript
84
+ ol
85
+ optgroup
86
+ option
87
+ p
88
+ param
89
+ plaintext
90
+ pre
91
+ script
92
+ select
93
+ spacer
94
+ style
95
+ tbody
96
+ textarea
97
+ tfoot
98
+ thead
99
+ title
100
+ tr
101
+ ul
102
+ wbr
103
+ ]
104
+
105
+ SPACE_CHARACTERS = %W[
106
+ \t
107
+ \n
108
+ \x0B
109
+ \x0C
110
+ \x20
111
+ \r
112
+ ]
113
+
114
+ TABLE_INSERT_MODE_ELEMENTS = %w[
115
+ table
116
+ tbody
117
+ tfoot
118
+ thead
119
+ tr
120
+ ]
121
+
122
+ ASCII_LOWERCASE = ('a'..'z').to_a.join('')
123
+ ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
124
+ ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
125
+ DIGITS = '0'..'9'
126
+ HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
127
+
128
+ # Heading elements need to be ordered
129
+ HEADING_ELEMENTS = %w[
130
+ h1
131
+ h2
132
+ h3
133
+ h4
134
+ h5
135
+ h6
136
+ ]
137
+
138
+ # XXX What about event-source and command?
139
+ VOID_ELEMENTS = %w[
140
+ base
141
+ link
142
+ meta
143
+ hr
144
+ br
145
+ img
146
+ embed
147
+ param
148
+ area
149
+ col
150
+ input
151
+ ]
152
+
153
+ CDATA_ELEMENTS = %w[title textarea]
154
+
155
+ RCDATA_ELEMENTS = %w[
156
+ style
157
+ script
158
+ xmp
159
+ iframe
160
+ noembed
161
+ noframes
162
+ noscript
163
+ ]
164
+
165
+ BOOLEAN_ATTRIBUTES = {
166
+ :global => %w[irrelevant],
167
+ 'style' => %w[scoped],
168
+ 'img' => %w[ismap],
169
+ 'audio' => %w[autoplay controls],
170
+ 'video' => %w[autoplay controls],
171
+ 'script' => %w[defer async],
172
+ 'details' => %w[open],
173
+ 'datagrid' => %w[multiple disabled],
174
+ 'command' => %w[hidden disabled checked default],
175
+ 'menu' => %w[autosubmit],
176
+ 'fieldset' => %w[disabled readonly],
177
+ 'option' => %w[disabled readonly selected],
178
+ 'optgroup' => %w[disabled readonly],
179
+ 'button' => %w[disabled autofocus],
180
+ 'input' => %w[disabled readonly required autofocus checked ismap],
181
+ 'select' => %w[disabled readonly autofocus multiple],
182
+ 'output' => %w[disabled readonly]
183
+
184
+ }
185
+
186
+ # entitiesWindows1252 has to be _ordered_ and needs to have an index.
187
+ ENTITIES_WINDOWS1252 = [
188
+ 8364, # 0x80 0x20AC EURO SIGN
189
+ 65533, # 0x81 UNDEFINED
190
+ 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
191
+ 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
192
+ 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
193
+ 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
194
+ 8224, # 0x86 0x2020 DAGGER
195
+ 8225, # 0x87 0x2021 DOUBLE DAGGER
196
+ 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
197
+ 8240, # 0x89 0x2030 PER MILLE SIGN
198
+ 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
199
+ 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
200
+ 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
201
+ 65533, # 0x8D UNDEFINED
202
+ 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
203
+ 65533, # 0x8F UNDEFINED
204
+ 65533, # 0x90 UNDEFINED
205
+ 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
206
+ 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
207
+ 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
208
+ 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
209
+ 8226, # 0x95 0x2022 BULLET
210
+ 8211, # 0x96 0x2013 EN DASH
211
+ 8212, # 0x97 0x2014 EM DASH
212
+ 732, # 0x98 0x02DC SMALL TILDE
213
+ 8482, # 0x99 0x2122 TRADE MARK SIGN
214
+ 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
215
+ 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
216
+ 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
217
+ 65533, # 0x9D UNDEFINED
218
+ 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
219
+ 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
220
+ ]
221
+
222
+ # ENTITIES was generated from Python using the following code:
223
+ #
224
+ # import constants
225
+ # entities = constants.entities.items()
226
+ # entities.sort()
227
+ # list = [ ' '.join([repr(entity), '=>', ord(value)<128 and
228
+ # repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
229
+ # for entity, value in entities]
230
+ # print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
231
+
232
+ ENTITIES = {
233
+ 'AElig' => "\xc3\x86",
234
+ 'AElig;' => "\xc3\x86",
235
+ 'AMP' => '&',
236
+ 'AMP;' => '&',
237
+ 'Aacute' => "\xc3\x81",
238
+ 'Aacute;' => "\xc3\x81",
239
+ 'Acirc' => "\xc3\x82",
240
+ 'Acirc;' => "\xc3\x82",
241
+ 'Agrave' => "\xc3\x80",
242
+ 'Agrave;' => "\xc3\x80",
243
+ 'Alpha;' => "\xce\x91",
244
+ 'Aring' => "\xc3\x85",
245
+ 'Aring;' => "\xc3\x85",
246
+ 'Atilde' => "\xc3\x83",
247
+ 'Atilde;' => "\xc3\x83",
248
+ 'Auml' => "\xc3\x84",
249
+ 'Auml;' => "\xc3\x84",
250
+ 'Beta;' => "\xce\x92",
251
+ 'COPY' => "\xc2\xa9",
252
+ 'COPY;' => "\xc2\xa9",
253
+ 'Ccedil' => "\xc3\x87",
254
+ 'Ccedil;' => "\xc3\x87",
255
+ 'Chi;' => "\xce\xa7",
256
+ 'Dagger;' => "\xe2\x80\xa1",
257
+ 'Delta;' => "\xce\x94",
258
+ 'ETH' => "\xc3\x90",
259
+ 'ETH;' => "\xc3\x90",
260
+ 'Eacute' => "\xc3\x89",
261
+ 'Eacute;' => "\xc3\x89",
262
+ 'Ecirc' => "\xc3\x8a",
263
+ 'Ecirc;' => "\xc3\x8a",
264
+ 'Egrave' => "\xc3\x88",
265
+ 'Egrave;' => "\xc3\x88",
266
+ 'Epsilon;' => "\xce\x95",
267
+ 'Eta;' => "\xce\x97",
268
+ 'Euml' => "\xc3\x8b",
269
+ 'Euml;' => "\xc3\x8b",
270
+ 'GT' => '>',
271
+ 'GT;' => '>',
272
+ 'Gamma;' => "\xce\x93",
273
+ 'Iacute' => "\xc3\x8d",
274
+ 'Iacute;' => "\xc3\x8d",
275
+ 'Icirc' => "\xc3\x8e",
276
+ 'Icirc;' => "\xc3\x8e",
277
+ 'Igrave' => "\xc3\x8c",
278
+ 'Igrave;' => "\xc3\x8c",
279
+ 'Iota;' => "\xce\x99",
280
+ 'Iuml' => "\xc3\x8f",
281
+ 'Iuml;' => "\xc3\x8f",
282
+ 'Kappa;' => "\xce\x9a",
283
+ 'LT' => '<',
284
+ 'LT;' => '<',
285
+ 'Lambda;' => "\xce\x9b",
286
+ 'Mu;' => "\xce\x9c",
287
+ 'Ntilde' => "\xc3\x91",
288
+ 'Ntilde;' => "\xc3\x91",
289
+ 'Nu;' => "\xce\x9d",
290
+ 'OElig;' => "\xc5\x92",
291
+ 'Oacute' => "\xc3\x93",
292
+ 'Oacute;' => "\xc3\x93",
293
+ 'Ocirc' => "\xc3\x94",
294
+ 'Ocirc;' => "\xc3\x94",
295
+ 'Ograve' => "\xc3\x92",
296
+ 'Ograve;' => "\xc3\x92",
297
+ 'Omega;' => "\xce\xa9",
298
+ 'Omicron;' => "\xce\x9f",
299
+ 'Oslash' => "\xc3\x98",
300
+ 'Oslash;' => "\xc3\x98",
301
+ 'Otilde' => "\xc3\x95",
302
+ 'Otilde;' => "\xc3\x95",
303
+ 'Ouml' => "\xc3\x96",
304
+ 'Ouml;' => "\xc3\x96",
305
+ 'Phi;' => "\xce\xa6",
306
+ 'Pi;' => "\xce\xa0",
307
+ 'Prime;' => "\xe2\x80\xb3",
308
+ 'Psi;' => "\xce\xa8",
309
+ 'QUOT' => '"',
310
+ 'QUOT;' => '"',
311
+ 'REG' => "\xc2\xae",
312
+ 'REG;' => "\xc2\xae",
313
+ 'Rho;' => "\xce\xa1",
314
+ 'Scaron;' => "\xc5\xa0",
315
+ 'Sigma;' => "\xce\xa3",
316
+ 'THORN' => "\xc3\x9e",
317
+ 'THORN;' => "\xc3\x9e",
318
+ 'TRADE;' => "\xe2\x84\xa2",
319
+ 'Tau;' => "\xce\xa4",
320
+ 'Theta;' => "\xce\x98",
321
+ 'Uacute' => "\xc3\x9a",
322
+ 'Uacute;' => "\xc3\x9a",
323
+ 'Ucirc' => "\xc3\x9b",
324
+ 'Ucirc;' => "\xc3\x9b",
325
+ 'Ugrave' => "\xc3\x99",
326
+ 'Ugrave;' => "\xc3\x99",
327
+ 'Upsilon;' => "\xce\xa5",
328
+ 'Uuml' => "\xc3\x9c",
329
+ 'Uuml;' => "\xc3\x9c",
330
+ 'Xi;' => "\xce\x9e",
331
+ 'Yacute' => "\xc3\x9d",
332
+ 'Yacute;' => "\xc3\x9d",
333
+ 'Yuml;' => "\xc5\xb8",
334
+ 'Zeta;' => "\xce\x96",
335
+ 'aacute' => "\xc3\xa1",
336
+ 'aacute;' => "\xc3\xa1",
337
+ 'acirc' => "\xc3\xa2",
338
+ 'acirc;' => "\xc3\xa2",
339
+ 'acute' => "\xc2\xb4",
340
+ 'acute;' => "\xc2\xb4",
341
+ 'aelig' => "\xc3\xa6",
342
+ 'aelig;' => "\xc3\xa6",
343
+ 'agrave' => "\xc3\xa0",
344
+ 'agrave;' => "\xc3\xa0",
345
+ 'alefsym;' => "\xe2\x84\xb5",
346
+ 'alpha;' => "\xce\xb1",
347
+ 'amp' => '&',
348
+ 'amp;' => '&',
349
+ 'and;' => "\xe2\x88\xa7",
350
+ 'ang;' => "\xe2\x88\xa0",
351
+ 'apos;' => "'",
352
+ 'aring' => "\xc3\xa5",
353
+ 'aring;' => "\xc3\xa5",
354
+ 'asymp;' => "\xe2\x89\x88",
355
+ 'atilde' => "\xc3\xa3",
356
+ 'atilde;' => "\xc3\xa3",
357
+ 'auml' => "\xc3\xa4",
358
+ 'auml;' => "\xc3\xa4",
359
+ 'bdquo;' => "\xe2\x80\x9e",
360
+ 'beta;' => "\xce\xb2",
361
+ 'brvbar' => "\xc2\xa6",
362
+ 'brvbar;' => "\xc2\xa6",
363
+ 'bull;' => "\xe2\x80\xa2",
364
+ 'cap;' => "\xe2\x88\xa9",
365
+ 'ccedil' => "\xc3\xa7",
366
+ 'ccedil;' => "\xc3\xa7",
367
+ 'cedil' => "\xc2\xb8",
368
+ 'cedil;' => "\xc2\xb8",
369
+ 'cent' => "\xc2\xa2",
370
+ 'cent;' => "\xc2\xa2",
371
+ 'chi;' => "\xcf\x87",
372
+ 'circ;' => "\xcb\x86",
373
+ 'clubs;' => "\xe2\x99\xa3",
374
+ 'cong;' => "\xe2\x89\x85",
375
+ 'copy' => "\xc2\xa9",
376
+ 'copy;' => "\xc2\xa9",
377
+ 'crarr;' => "\xe2\x86\xb5",
378
+ 'cup;' => "\xe2\x88\xaa",
379
+ 'curren' => "\xc2\xa4",
380
+ 'curren;' => "\xc2\xa4",
381
+ 'dArr;' => "\xe2\x87\x93",
382
+ 'dagger;' => "\xe2\x80\xa0",
383
+ 'darr;' => "\xe2\x86\x93",
384
+ 'deg' => "\xc2\xb0",
385
+ 'deg;' => "\xc2\xb0",
386
+ 'delta;' => "\xce\xb4",
387
+ 'diams;' => "\xe2\x99\xa6",
388
+ 'divide' => "\xc3\xb7",
389
+ 'divide;' => "\xc3\xb7",
390
+ 'eacute' => "\xc3\xa9",
391
+ 'eacute;' => "\xc3\xa9",
392
+ 'ecirc' => "\xc3\xaa",
393
+ 'ecirc;' => "\xc3\xaa",
394
+ 'egrave' => "\xc3\xa8",
395
+ 'egrave;' => "\xc3\xa8",
396
+ 'empty;' => "\xe2\x88\x85",
397
+ 'emsp;' => "\xe2\x80\x83",
398
+ 'ensp;' => "\xe2\x80\x82",
399
+ 'epsilon;' => "\xce\xb5",
400
+ 'equiv;' => "\xe2\x89\xa1",
401
+ 'eta;' => "\xce\xb7",
402
+ 'eth' => "\xc3\xb0",
403
+ 'eth;' => "\xc3\xb0",
404
+ 'euml' => "\xc3\xab",
405
+ 'euml;' => "\xc3\xab",
406
+ 'euro;' => "\xe2\x82\xac",
407
+ 'exist;' => "\xe2\x88\x83",
408
+ 'fnof;' => "\xc6\x92",
409
+ 'forall;' => "\xe2\x88\x80",
410
+ 'frac12' => "\xc2\xbd",
411
+ 'frac12;' => "\xc2\xbd",
412
+ 'frac14' => "\xc2\xbc",
413
+ 'frac14;' => "\xc2\xbc",
414
+ 'frac34' => "\xc2\xbe",
415
+ 'frac34;' => "\xc2\xbe",
416
+ 'frasl;' => "\xe2\x81\x84",
417
+ 'gamma;' => "\xce\xb3",
418
+ 'ge;' => "\xe2\x89\xa5",
419
+ 'gt' => '>',
420
+ 'gt;' => '>',
421
+ 'hArr;' => "\xe2\x87\x94",
422
+ 'harr;' => "\xe2\x86\x94",
423
+ 'hearts;' => "\xe2\x99\xa5",
424
+ 'hellip;' => "\xe2\x80\xa6",
425
+ 'iacute' => "\xc3\xad",
426
+ 'iacute;' => "\xc3\xad",
427
+ 'icirc' => "\xc3\xae",
428
+ 'icirc;' => "\xc3\xae",
429
+ 'iexcl' => "\xc2\xa1",
430
+ 'iexcl;' => "\xc2\xa1",
431
+ 'igrave' => "\xc3\xac",
432
+ 'igrave;' => "\xc3\xac",
433
+ 'image;' => "\xe2\x84\x91",
434
+ 'infin;' => "\xe2\x88\x9e",
435
+ 'int;' => "\xe2\x88\xab",
436
+ 'iota;' => "\xce\xb9",
437
+ 'iquest' => "\xc2\xbf",
438
+ 'iquest;' => "\xc2\xbf",
439
+ 'isin;' => "\xe2\x88\x88",
440
+ 'iuml' => "\xc3\xaf",
441
+ 'iuml;' => "\xc3\xaf",
442
+ 'kappa;' => "\xce\xba",
443
+ 'lArr;' => "\xe2\x87\x90",
444
+ 'lambda;' => "\xce\xbb",
445
+ 'lang;' => "\xe3\x80\x88",
446
+ 'laquo' => "\xc2\xab",
447
+ 'laquo;' => "\xc2\xab",
448
+ 'larr;' => "\xe2\x86\x90",
449
+ 'lceil;' => "\xe2\x8c\x88",
450
+ 'ldquo;' => "\xe2\x80\x9c",
451
+ 'le;' => "\xe2\x89\xa4",
452
+ 'lfloor;' => "\xe2\x8c\x8a",
453
+ 'lowast;' => "\xe2\x88\x97",
454
+ 'loz;' => "\xe2\x97\x8a",
455
+ 'lrm;' => "\xe2\x80\x8e",
456
+ 'lsaquo;' => "\xe2\x80\xb9",
457
+ 'lsquo;' => "\xe2\x80\x98",
458
+ 'lt' => '<',
459
+ 'lt;' => '<',
460
+ 'macr' => "\xc2\xaf",
461
+ 'macr;' => "\xc2\xaf",
462
+ 'mdash;' => "\xe2\x80\x94",
463
+ 'micro' => "\xc2\xb5",
464
+ 'micro;' => "\xc2\xb5",
465
+ 'middot' => "\xc2\xb7",
466
+ 'middot;' => "\xc2\xb7",
467
+ 'minus;' => "\xe2\x88\x92",
468
+ 'mu;' => "\xce\xbc",
469
+ 'nabla;' => "\xe2\x88\x87",
470
+ 'nbsp' => "\xc2\xa0",
471
+ 'nbsp;' => "\xc2\xa0",
472
+ 'ndash;' => "\xe2\x80\x93",
473
+ 'ne;' => "\xe2\x89\xa0",
474
+ 'ni;' => "\xe2\x88\x8b",
475
+ 'not' => "\xc2\xac",
476
+ 'not;' => "\xc2\xac",
477
+ 'notin;' => "\xe2\x88\x89",
478
+ 'nsub;' => "\xe2\x8a\x84",
479
+ 'ntilde' => "\xc3\xb1",
480
+ 'ntilde;' => "\xc3\xb1",
481
+ 'nu;' => "\xce\xbd",
482
+ 'oacute' => "\xc3\xb3",
483
+ 'oacute;' => "\xc3\xb3",
484
+ 'ocirc' => "\xc3\xb4",
485
+ 'ocirc;' => "\xc3\xb4",
486
+ 'oelig;' => "\xc5\x93",
487
+ 'ograve' => "\xc3\xb2",
488
+ 'ograve;' => "\xc3\xb2",
489
+ 'oline;' => "\xe2\x80\xbe",
490
+ 'omega;' => "\xcf\x89",
491
+ 'omicron;' => "\xce\xbf",
492
+ 'oplus;' => "\xe2\x8a\x95",
493
+ 'or;' => "\xe2\x88\xa8",
494
+ 'ordf' => "\xc2\xaa",
495
+ 'ordf;' => "\xc2\xaa",
496
+ 'ordm' => "\xc2\xba",
497
+ 'ordm;' => "\xc2\xba",
498
+ 'oslash' => "\xc3\xb8",
499
+ 'oslash;' => "\xc3\xb8",
500
+ 'otilde' => "\xc3\xb5",
501
+ 'otilde;' => "\xc3\xb5",
502
+ 'otimes;' => "\xe2\x8a\x97",
503
+ 'ouml' => "\xc3\xb6",
504
+ 'ouml;' => "\xc3\xb6",
505
+ 'para' => "\xc2\xb6",
506
+ 'para;' => "\xc2\xb6",
507
+ 'part;' => "\xe2\x88\x82",
508
+ 'permil;' => "\xe2\x80\xb0",
509
+ 'perp;' => "\xe2\x8a\xa5",
510
+ 'phi;' => "\xcf\x86",
511
+ 'pi;' => "\xcf\x80",
512
+ 'piv;' => "\xcf\x96",
513
+ 'plusmn' => "\xc2\xb1",
514
+ 'plusmn;' => "\xc2\xb1",
515
+ 'pound' => "\xc2\xa3",
516
+ 'pound;' => "\xc2\xa3",
517
+ 'prime;' => "\xe2\x80\xb2",
518
+ 'prod;' => "\xe2\x88\x8f",
519
+ 'prop;' => "\xe2\x88\x9d",
520
+ 'psi;' => "\xcf\x88",
521
+ 'quot' => '"',
522
+ 'quot;' => '"',
523
+ 'rArr;' => "\xe2\x87\x92",
524
+ 'radic;' => "\xe2\x88\x9a",
525
+ 'rang;' => "\xe3\x80\x89",
526
+ 'raquo' => "\xc2\xbb",
527
+ 'raquo;' => "\xc2\xbb",
528
+ 'rarr;' => "\xe2\x86\x92",
529
+ 'rceil;' => "\xe2\x8c\x89",
530
+ 'rdquo;' => "\xe2\x80\x9d",
531
+ 'real;' => "\xe2\x84\x9c",
532
+ 'reg' => "\xc2\xae",
533
+ 'reg;' => "\xc2\xae",
534
+ 'rfloor;' => "\xe2\x8c\x8b",
535
+ 'rho;' => "\xcf\x81",
536
+ 'rlm;' => "\xe2\x80\x8f",
537
+ 'rsaquo;' => "\xe2\x80\xba",
538
+ 'rsquo;' => "\xe2\x80\x99",
539
+ 'sbquo;' => "\xe2\x80\x9a",
540
+ 'scaron;' => "\xc5\xa1",
541
+ 'sdot;' => "\xe2\x8b\x85",
542
+ 'sect' => "\xc2\xa7",
543
+ 'sect;' => "\xc2\xa7",
544
+ 'shy' => "\xc2\xad",
545
+ 'shy;' => "\xc2\xad",
546
+ 'sigma;' => "\xcf\x83",
547
+ 'sigmaf;' => "\xcf\x82",
548
+ 'sim;' => "\xe2\x88\xbc",
549
+ 'spades;' => "\xe2\x99\xa0",
550
+ 'sub;' => "\xe2\x8a\x82",
551
+ 'sube;' => "\xe2\x8a\x86",
552
+ 'sum;' => "\xe2\x88\x91",
553
+ 'sup1' => "\xc2\xb9",
554
+ 'sup1;' => "\xc2\xb9",
555
+ 'sup2' => "\xc2\xb2",
556
+ 'sup2;' => "\xc2\xb2",
557
+ 'sup3' => "\xc2\xb3",
558
+ 'sup3;' => "\xc2\xb3",
559
+ 'sup;' => "\xe2\x8a\x83",
560
+ 'supe;' => "\xe2\x8a\x87",
561
+ 'szlig' => "\xc3\x9f",
562
+ 'szlig;' => "\xc3\x9f",
563
+ 'tau;' => "\xcf\x84",
564
+ 'there4;' => "\xe2\x88\xb4",
565
+ 'theta;' => "\xce\xb8",
566
+ 'thetasym;' => "\xcf\x91",
567
+ 'thinsp;' => "\xe2\x80\x89",
568
+ 'thorn' => "\xc3\xbe",
569
+ 'thorn;' => "\xc3\xbe",
570
+ 'tilde;' => "\xcb\x9c",
571
+ 'times' => "\xc3\x97",
572
+ 'times;' => "\xc3\x97",
573
+ 'trade;' => "\xe2\x84\xa2",
574
+ 'uArr;' => "\xe2\x87\x91",
575
+ 'uacute' => "\xc3\xba",
576
+ 'uacute;' => "\xc3\xba",
577
+ 'uarr;' => "\xe2\x86\x91",
578
+ 'ucirc' => "\xc3\xbb",
579
+ 'ucirc;' => "\xc3\xbb",
580
+ 'ugrave' => "\xc3\xb9",
581
+ 'ugrave;' => "\xc3\xb9",
582
+ 'uml' => "\xc2\xa8",
583
+ 'uml;' => "\xc2\xa8",
584
+ 'upsih;' => "\xcf\x92",
585
+ 'upsilon;' => "\xcf\x85",
586
+ 'uuml' => "\xc3\xbc",
587
+ 'uuml;' => "\xc3\xbc",
588
+ 'weierp;' => "\xe2\x84\x98",
589
+ 'xi;' => "\xce\xbe",
590
+ 'yacute' => "\xc3\xbd",
591
+ 'yacute;' => "\xc3\xbd",
592
+ 'yen' => "\xc2\xa5",
593
+ 'yen;' => "\xc2\xa5",
594
+ 'yuml' => "\xc3\xbf",
595
+ 'yuml;' => "\xc3\xbf",
596
+ 'zeta;' => "\xce\xb6",
597
+ 'zwj;' => "\xe2\x80\x8d",
598
+ 'zwnj;' => "\xe2\x80\x8c"
599
+ }
600
+
601
+ ENCODINGS = %w[
602
+ ansi_x3.4-1968
603
+ iso-ir-6
604
+ ansi_x3.4-1986
605
+ iso_646.irv:1991
606
+ ascii
607
+ iso646-us
608
+ us-ascii
609
+ us
610
+ ibm367
611
+ cp367
612
+ csascii
613
+ ks_c_5601-1987
614
+ korean
615
+ iso-2022-kr
616
+ csiso2022kr
617
+ euc-kr
618
+ iso-2022-jp
619
+ csiso2022jp
620
+ iso-2022-jp-2
621
+ iso-ir-58
622
+ chinese
623
+ csiso58gb231280
624
+ iso_8859-1:1987
625
+ iso-ir-100
626
+ iso_8859-1
627
+ iso-8859-1
628
+ latin1
629
+ l1
630
+ ibm819
631
+ cp819
632
+ csisolatin1
633
+ iso_8859-2:1987
634
+ iso-ir-101
635
+ iso_8859-2
636
+ iso-8859-2
637
+ latin2
638
+ l2
639
+ csisolatin2
640
+ iso_8859-3:1988
641
+ iso-ir-109
642
+ iso_8859-3
643
+ iso-8859-3
644
+ latin3
645
+ l3
646
+ csisolatin3
647
+ iso_8859-4:1988
648
+ iso-ir-110
649
+ iso_8859-4
650
+ iso-8859-4
651
+ latin4
652
+ l4
653
+ csisolatin4
654
+ iso_8859-6:1987
655
+ iso-ir-127
656
+ iso_8859-6
657
+ iso-8859-6
658
+ ecma-114
659
+ asmo-708
660
+ arabic
661
+ csisolatinarabic
662
+ iso_8859-7:1987
663
+ iso-ir-126
664
+ iso_8859-7
665
+ iso-8859-7
666
+ elot_928
667
+ ecma-118
668
+ greek
669
+ greek8
670
+ csisolatingreek
671
+ iso_8859-8:1988
672
+ iso-ir-138
673
+ iso_8859-8
674
+ iso-8859-8
675
+ hebrew
676
+ csisolatinhebrew
677
+ iso_8859-5:1988
678
+ iso-ir-144
679
+ iso_8859-5
680
+ iso-8859-5
681
+ cyrillic
682
+ csisolatincyrillic
683
+ iso_8859-9:1989
684
+ iso-ir-148
685
+ iso_8859-9
686
+ iso-8859-9
687
+ latin5
688
+ l5
689
+ csisolatin5
690
+ iso-8859-10
691
+ iso-ir-157
692
+ l6
693
+ iso_8859-10:1992
694
+ csisolatin6
695
+ latin6
696
+ hp-roman8
697
+ roman8
698
+ r8
699
+ ibm037
700
+ cp037
701
+ csibm037
702
+ ibm424
703
+ cp424
704
+ csibm424
705
+ ibm437
706
+ cp437
707
+ 437
708
+ cspc8codepage437
709
+ ibm500
710
+ cp500
711
+ csibm500
712
+ ibm775
713
+ cp775
714
+ cspc775baltic
715
+ ibm850
716
+ cp850
717
+ 850
718
+ cspc850multilingual
719
+ ibm852
720
+ cp852
721
+ 852
722
+ cspcp852
723
+ ibm855
724
+ cp855
725
+ 855
726
+ csibm855
727
+ ibm857
728
+ cp857
729
+ 857
730
+ csibm857
731
+ ibm860
732
+ cp860
733
+ 860
734
+ csibm860
735
+ ibm861
736
+ cp861
737
+ 861
738
+ cp-is
739
+ csibm861
740
+ ibm862
741
+ cp862
742
+ 862
743
+ cspc862latinhebrew
744
+ ibm863
745
+ cp863
746
+ 863
747
+ csibm863
748
+ ibm864
749
+ cp864
750
+ csibm864
751
+ ibm865
752
+ cp865
753
+ 865
754
+ csibm865
755
+ ibm866
756
+ cp866
757
+ 866
758
+ csibm866
759
+ ibm869
760
+ cp869
761
+ 869
762
+ cp-gr
763
+ csibm869
764
+ ibm1026
765
+ cp1026
766
+ csibm1026
767
+ koi8-r
768
+ cskoi8r
769
+ koi8-u
770
+ big5-hkscs
771
+ ptcp154
772
+ csptcp154
773
+ pt154
774
+ cp154
775
+ utf-7
776
+ utf-16be
777
+ utf-16le
778
+ utf-16
779
+ utf-8
780
+ iso-8859-13
781
+ iso-8859-14
782
+ iso-ir-199
783
+ iso_8859-14:1998
784
+ iso_8859-14
785
+ latin8
786
+ iso-celtic
787
+ l8
788
+ iso-8859-15
789
+ iso_8859-15
790
+ iso-8859-16
791
+ iso-ir-226
792
+ iso_8859-16:2001
793
+ iso_8859-16
794
+ latin10
795
+ l10
796
+ gbk
797
+ cp936
798
+ ms936
799
+ gb18030
800
+ shift_jis
801
+ ms_kanji
802
+ csshiftjis
803
+ euc-jp
804
+ gb2312
805
+ big5
806
+ csbig5
807
+ windows-1250
808
+ windows-1251
809
+ windows-1252
810
+ windows-1253
811
+ windows-1254
812
+ windows-1255
813
+ windows-1256
814
+ windows-1257
815
+ windows-1258
816
+ tis-620
817
+ hz-gb-2312
818
+ ]
819
+
820
+ E = {
821
+ "null-character" =>
822
+ _("Null character in input stream, replaced with U+FFFD."),
823
+ "incorrectly-placed-solidus" =>
824
+ _("Solidus (/) incorrectly placed in tag."),
825
+ "incorrect-cr-newline-entity" =>
826
+ _("Incorrect CR newline entity, replaced with LF."),
827
+ "illegal-windows-1252-entity" =>
828
+ _("Entity used with illegal number (windows-1252 reference)."),
829
+ "cant-convert-numeric-entity" =>
830
+ _("Numeric entity couldn't be converted to character " +
831
+ "(codepoint U+%(charAsInt)08x)."),
832
+ "illegal-codepoint-for-numeric-entity" =>
833
+ _("Numeric entity represents an illegal codepoint=> " +
834
+ "U+%(charAsInt)08x."),
835
+ "numeric-entity-without-semicolon" =>
836
+ _("Numeric entity didn't end with ';'."),
837
+ "expected-numeric-entity-but-got-eof" =>
838
+ _("Numeric entity expected. Got end of file instead."),
839
+ "expected-numeric-entity" =>
840
+ _("Numeric entity expected but none found."),
841
+ "named-entity-without-semicolon" =>
842
+ _("Named entity didn't end with ';'."),
843
+ "expected-named-entity" =>
844
+ _("Named entity expected. Got none."),
845
+ "attributes-in-end-tag" =>
846
+ _("End tag contains unexpected attributes."),
847
+ "expected-tag-name-but-got-right-bracket" =>
848
+ _("Expected tag name. Got '>' instead."),
849
+ "expected-tag-name-but-got-question-mark" =>
850
+ _("Expected tag name. Got '?' instead. (HTML doesn't " +
851
+ "support processing instructions.)"),
852
+ "expected-tag-name" =>
853
+ _("Expected tag name. Got something else instead"),
854
+ "expected-closing-tag-but-got-right-bracket" =>
855
+ _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
856
+ "expected-closing-tag-but-got-eof" =>
857
+ _("Expected closing tag. Unexpected end of file."),
858
+ "expected-closing-tag-but-got-char" =>
859
+ _("Expected closing tag. Unexpected character '%(data)' found."),
860
+ "eof-in-tag-name" =>
861
+ _("Unexpected end of file in the tag name."),
862
+ "expected-attribute-name-but-got-eof" =>
863
+ _("Unexpected end of file. Expected attribute name instead."),
864
+ "eof-in-attribute-name" =>
865
+ _("Unexpected end of file in attribute name."),
866
+ "duplicate-attribute" =>
867
+ _("Dropped duplicate attribute on tag."),
868
+ "expected-end-of-tag-name-but-got-eof" =>
869
+ _("Unexpected end of file. Expected = or end of tag."),
870
+ "expected-attribute-value-but-got-eof" =>
871
+ _("Unexpected end of file. Expected attribute value."),
872
+ "eof-in-attribute-value-double-quote" =>
873
+ _("Unexpected end of file in attribute value (\")."),
874
+ "eof-in-attribute-value-single-quote" =>
875
+ _("Unexpected end of file in attribute value (')."),
876
+ "eof-in-attribute-value-no-quotes" =>
877
+ _("Unexpected end of file in attribute value."),
878
+ "expected-dashes-or-doctype" =>
879
+ _("Expected '--' or 'DOCTYPE'. Not found."),
880
+ "incorrect-comment" =>
881
+ _("Incorrect comment."),
882
+ "eof-in-comment" =>
883
+ _("Unexpected end of file in comment."),
884
+ "eof-in-comment-end-dash" =>
885
+ _("Unexpected end of file in comment (-)"),
886
+ "unexpected-dash-after-double-dash-in-comment" =>
887
+ _("Unexpected '-' after '--' found in comment."),
888
+ "eof-in-comment-double-dash" =>
889
+ _("Unexpected end of file in comment (--)."),
890
+ "unexpected-char-in-comment" =>
891
+ _("Unexpected character in comment found."),
892
+ "need-space-after-doctype" =>
893
+ _("No space after literal string 'DOCTYPE'."),
894
+ "expected-doctype-name-but-got-right-bracket" =>
895
+ _("Unexpected > character. Expected DOCTYPE name."),
896
+ "expected-doctype-name-but-got-eof" =>
897
+ _("Unexpected end of file. Expected DOCTYPE name."),
898
+ "eof-in-doctype-name" =>
899
+ _("Unexpected end of file in DOCTYPE name."),
900
+ "eof-in-doctype" =>
901
+ _("Unexpected end of file in DOCTYPE."),
902
+ "expected-space-or-right-bracket-in-doctype" =>
903
+ _("Expected space or '>'. Got '%(data)'"),
904
+ "unexpected-end-of-doctype" =>
905
+ _("Unexpected end of DOCTYPE."),
906
+ "unexpected-char-in-doctype" =>
907
+ _("Unexpected character in DOCTYPE."),
908
+ "eof-in-bogus-doctype" =>
909
+ _("Unexpected end of file in bogus doctype."),
910
+ "eof-in-innerhtml" =>
911
+ _("XXX innerHTML EOF"),
912
+ "unexpected-doctype" =>
913
+ _("Unexpected DOCTYPE. Ignored."),
914
+ "non-html-root" =>
915
+ _("html needs to be the first start tag."),
916
+ "expected-doctype-but-got-eof" =>
917
+ _("Unexpected End of file. Expected DOCTYPE."),
918
+ "unknown-doctype" =>
919
+ _("Erroneous DOCTYPE."),
920
+ "expected-doctype-but-got-chars" =>
921
+ _("Unexpected non-space characters. Expected DOCTYPE."),
922
+ "expected-doctype-but-got-start-tag" =>
923
+ _("Unexpected start tag (%(name)). Expected DOCTYPE."),
924
+ "expected-doctype-but-got-end-tag" =>
925
+ _("Unexpected end tag (%(name)). Expected DOCTYPE."),
926
+ "end-tag-after-implied-root" =>
927
+ _("Unexpected end tag (%(name)) after the (implied) root element."),
928
+ "expected-named-closing-tag-but-got-eof" =>
929
+ _("Unexpected end of file. Expected end tag (%(name))."),
930
+ "two-heads-are-not-better-than-one" =>
931
+ _("Unexpected start tag head in existing head. Ignored."),
932
+ "unexpected-end-tag" =>
933
+ _("Unexpected end tag (%(name)). Ignored."),
934
+ "unexpected-start-tag-out-of-my-head" =>
935
+ _("Unexpected start tag (%(name)) that can be in head. Moved."),
936
+ "unexpected-start-tag" =>
937
+ _("Unexpected start tag (%(name))."),
938
+ "missing-end-tag" =>
939
+ _("Missing end tag (%(name))."),
940
+ "missing-end-tags" =>
941
+ _("Missing end tags (%(name))."),
942
+ "unexpected-start-tag-implies-end-tag" =>
943
+ _("Unexpected start tag (%(startName)) " +
944
+ "implies end tag (%(endName))."),
945
+ "unexpected-start-tag-treated-as" =>
946
+ _("Unexpected start tag (%(originalName)). Treated as %(newName)."),
947
+ "deprecated-tag" =>
948
+ _("Unexpected start tag %(name). Don't use it!"),
949
+ "unexpected-start-tag-ignored" =>
950
+ _("Unexpected start tag %(name). Ignored."),
951
+ "expected-one-end-tag-but-got-another" =>
952
+ _("Unexpected end tag (%(gotName)). " +
953
+ "Missing end tag (%(expectedName))."),
954
+ "end-tag-too-early" =>
955
+ _("End tag (%(name)) seen too early. Expected other end tag."),
956
+ "end-tag-too-early-named" =>
957
+ _("Unexpected end tag (%(gotName)). Expected end tag (%(expectedName))."),
958
+ "end-tag-too-early-ignored" =>
959
+ _("End tag (%(name)) seen too early. Ignored."),
960
+ "adoption-agency-1.1" =>
961
+ _("End tag (%(name)) violates step 1, " +
962
+ "paragraph 1 of the adoption agency algorithm."),
963
+ "adoption-agency-1.2" =>
964
+ _("End tag (%(name)) violates step 1, " +
965
+ "paragraph 2 of the adoption agency algorithm."),
966
+ "adoption-agency-1.3" =>
967
+ _("End tag (%(name)) violates step 1, " +
968
+ "paragraph 3 of the adoption agency algorithm."),
969
+ "unexpected-end-tag-treated-as" =>
970
+ _("Unexpected end tag (%(originalName)). Treated as %(newName)."),
971
+ "no-end-tag" =>
972
+ _("This element (%(name)) has no end tag."),
973
+ "unexpected-implied-end-tag-in-table" =>
974
+ _("Unexpected implied end tag (%(name)) in the table phase."),
975
+ "unexpected-implied-end-tag-in-table-body" =>
976
+ _("Unexpected implied end tag (%(name)) in the table body phase."),
977
+ "unexpected-char-implies-table-voodoo" =>
978
+ _("Unexpected non-space characters in " +
979
+ "table context caused voodoo mode."),
980
+ "unexpected-start-tag-implies-table-voodoo" =>
981
+ _("Unexpected start tag (%(name)) in " +
982
+ "table context caused voodoo mode."),
983
+ "unexpected-end-tag-implies-table-voodoo" =>
984
+ _("Unexpected end tag (%(name)) in " +
985
+ "table context caused voodoo mode."),
986
+ "unexpected-cell-in-table-body" =>
987
+ _("Unexpected table cell start tag (%(name)) " +
988
+ "in the table body phase."),
989
+ "unexpected-cell-end-tag" =>
990
+ _("Got table cell end tag (%(name)) " +
991
+ "while required end tags are missing."),
992
+ "unexpected-end-tag-in-table-body" =>
993
+ _("Unexpected end tag (%(name)) in the table body phase. Ignored."),
994
+ "unexpected-implied-end-tag-in-table-row" =>
995
+ _("Unexpected implied end tag (%(name)) in the table row phase."),
996
+ "unexpected-end-tag-in-table-row" =>
997
+ _("Unexpected end tag (%(name)) in the table row phase. Ignored."),
998
+ "unexpected-select-in-select" =>
999
+ _("Unexpected select start tag in the select phase " +
1000
+ "implies select start tag."),
1001
+ "unexpected-start-tag-in-select" =>
1002
+ _("Unexpected start tag token (%(name) in the select phase. " +
1003
+ "Ignored."),
1004
+ "unexpected-end-tag-in-select" =>
1005
+ _("Unexpected end tag (%(name)) in the select phase. Ignored."),
1006
+ "unexpected-char-after-body" =>
1007
+ _("Unexpected non-space characters in the after body phase."),
1008
+ "unexpected-start-tag-after-body" =>
1009
+ _("Unexpected start tag token (%(name))" +
1010
+ " in the after body phase."),
1011
+ "unexpected-end-tag-after-body" =>
1012
+ _("Unexpected end tag token (%(name))" +
1013
+ " in the after body phase."),
1014
+ "unexpected-char-in-frameset" =>
1015
+ _("Unepxected characters in the frameset phase. Characters ignored."),
1016
+ "unexpected-start-tag-in-frameset" =>
1017
+ _("Unexpected start tag token (%(name))" +
1018
+ " in the frameset phase. Ignored."),
1019
+ "unexpected-frameset-in-frameset-innerhtml" =>
1020
+ _("Unexpected end tag token (frameset) " +
1021
+ "in the frameset phase (innerHTML)."),
1022
+ "unexpected-end-tag-in-frameset" =>
1023
+ _("Unexpected end tag token (%(name))" +
1024
+ " in the frameset phase. Ignored."),
1025
+ "unexpected-char-after-frameset" =>
1026
+ _("Unexpected non-space characters in the " +
1027
+ "after frameset phase. Ignored."),
1028
+ "unexpected-start-tag-after-frameset" =>
1029
+ _("Unexpected start tag (%(name))" +
1030
+ " in the after frameset phase. Ignored."),
1031
+ "unexpected-end-tag-after-frameset" =>
1032
+ _("Unexpected end tag (%(name))" +
1033
+ " in the after frameset phase. Ignored."),
1034
+ "expected-eof-but-got-char" =>
1035
+ _("Unexpected non-space characters. Expected end of file."),
1036
+ "expected-eof-but-got-start-tag" =>
1037
+ _("Unexpected start tag (%(name))" +
1038
+ ". Expected end of file."),
1039
+ "expected-eof-but-got-end-tag" =>
1040
+ _("Unexpected end tag (%(name))" +
1041
+ ". Expected end of file."),
1042
+ "unexpected-end-table-in-caption" =>
1043
+ _("Unexpected end table tag in caption. Generates implied end caption.")
1044
+ }
1045
+
1046
+ end