feedtools 0.2.26 → 0.2.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,17 @@
1
+ class String
2
+ alias old_format %
3
+ define_method("%") do |data|
4
+ unless data.kind_of?(Hash)
5
+ $VERBOSE = false
6
+ r = old_format(data)
7
+ $VERBOSE = true
8
+ r
9
+ else
10
+ ret = self.clone
11
+ data.each do |k,v|
12
+ ret.gsub!(/\%\(#{k}\)/, v)
13
+ end
14
+ ret
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ require 'html5/html5parser'
2
+ require 'html5/version'
3
+
4
+ module HTML5
5
+
6
+ def self.parse(stream, options={})
7
+ HTMLParser.parse(stream, options)
8
+ end
9
+
10
+ def self.parse_fragment(stream, options={})
11
+ HTMLParser.parse(stream, options)
12
+ end
13
+ end
@@ -0,0 +1,1046 @@
1
+ module HTML5
2
+
3
+ class EOF < Exception; end
4
+
5
+ def self._(str); str end
6
+
7
+ CONTENT_MODEL_FLAGS = [
8
+ :PCDATA,
9
+ :RCDATA,
10
+ :CDATA,
11
+ :PLAINTEXT
12
+ ]
13
+
14
+ SCOPING_ELEMENTS = %w[
15
+ button
16
+ caption
17
+ html
18
+ marquee
19
+ object
20
+ table
21
+ td
22
+ th
23
+ ]
24
+
25
+ FORMATTING_ELEMENTS = %w[
26
+ a
27
+ b
28
+ big
29
+ em
30
+ font
31
+ i
32
+ nobr
33
+ s
34
+ small
35
+ strike
36
+ strong
37
+ tt
38
+ u
39
+ ]
40
+
41
+ SPECIAL_ELEMENTS = %w[
42
+ address
43
+ area
44
+ base
45
+ basefont
46
+ bgsound
47
+ blockquote
48
+ body
49
+ br
50
+ center
51
+ col
52
+ colgroup
53
+ dd
54
+ dir
55
+ div
56
+ dl
57
+ dt
58
+ embed
59
+ fieldset
60
+ form
61
+ frame
62
+ frameset
63
+ h1
64
+ h2
65
+ h3
66
+ h4
67
+ h5
68
+ h6
69
+ head
70
+ hr
71
+ iframe
72
+ image
73
+ img
74
+ input
75
+ isindex
76
+ li
77
+ link
78
+ listing
79
+ menu
80
+ meta
81
+ noembed
82
+ noframes
83
+ noscript
84
+ ol
85
+ optgroup
86
+ option
87
+ p
88
+ param
89
+ plaintext
90
+ pre
91
+ script
92
+ select
93
+ spacer
94
+ style
95
+ tbody
96
+ textarea
97
+ tfoot
98
+ thead
99
+ title
100
+ tr
101
+ ul
102
+ wbr
103
+ ]
104
+
105
+ SPACE_CHARACTERS = %W[
106
+ \t
107
+ \n
108
+ \x0B
109
+ \x0C
110
+ \x20
111
+ \r
112
+ ]
113
+
114
+ TABLE_INSERT_MODE_ELEMENTS = %w[
115
+ table
116
+ tbody
117
+ tfoot
118
+ thead
119
+ tr
120
+ ]
121
+
122
+ ASCII_LOWERCASE = ('a'..'z').to_a.join('')
123
+ ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
124
+ ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
125
+ DIGITS = '0'..'9'
126
+ HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
127
+
128
+ # Heading elements need to be ordered
129
+ HEADING_ELEMENTS = %w[
130
+ h1
131
+ h2
132
+ h3
133
+ h4
134
+ h5
135
+ h6
136
+ ]
137
+
138
+ # XXX What about event-source and command?
139
+ VOID_ELEMENTS = %w[
140
+ base
141
+ link
142
+ meta
143
+ hr
144
+ br
145
+ img
146
+ embed
147
+ param
148
+ area
149
+ col
150
+ input
151
+ ]
152
+
153
+ CDATA_ELEMENTS = %w[title textarea]
154
+
155
+ RCDATA_ELEMENTS = %w[
156
+ style
157
+ script
158
+ xmp
159
+ iframe
160
+ noembed
161
+ noframes
162
+ noscript
163
+ ]
164
+
165
+ BOOLEAN_ATTRIBUTES = {
166
+ :global => %w[irrelevant],
167
+ 'style' => %w[scoped],
168
+ 'img' => %w[ismap],
169
+ 'audio' => %w[autoplay controls],
170
+ 'video' => %w[autoplay controls],
171
+ 'script' => %w[defer async],
172
+ 'details' => %w[open],
173
+ 'datagrid' => %w[multiple disabled],
174
+ 'command' => %w[hidden disabled checked default],
175
+ 'menu' => %w[autosubmit],
176
+ 'fieldset' => %w[disabled readonly],
177
+ 'option' => %w[disabled readonly selected],
178
+ 'optgroup' => %w[disabled readonly],
179
+ 'button' => %w[disabled autofocus],
180
+ 'input' => %w[disabled readonly required autofocus checked ismap],
181
+ 'select' => %w[disabled readonly autofocus multiple],
182
+ 'output' => %w[disabled readonly]
183
+
184
+ }
185
+
186
+ # entitiesWindows1252 has to be _ordered_ and needs to have an index.
187
+ ENTITIES_WINDOWS1252 = [
188
+ 8364, # 0x80 0x20AC EURO SIGN
189
+ 65533, # 0x81 UNDEFINED
190
+ 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
191
+ 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
192
+ 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
193
+ 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
194
+ 8224, # 0x86 0x2020 DAGGER
195
+ 8225, # 0x87 0x2021 DOUBLE DAGGER
196
+ 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
197
+ 8240, # 0x89 0x2030 PER MILLE SIGN
198
+ 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
199
+ 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
200
+ 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
201
+ 65533, # 0x8D UNDEFINED
202
+ 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
203
+ 65533, # 0x8F UNDEFINED
204
+ 65533, # 0x90 UNDEFINED
205
+ 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
206
+ 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
207
+ 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
208
+ 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
209
+ 8226, # 0x95 0x2022 BULLET
210
+ 8211, # 0x96 0x2013 EN DASH
211
+ 8212, # 0x97 0x2014 EM DASH
212
+ 732, # 0x98 0x02DC SMALL TILDE
213
+ 8482, # 0x99 0x2122 TRADE MARK SIGN
214
+ 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
215
+ 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
216
+ 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
217
+ 65533, # 0x9D UNDEFINED
218
+ 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
219
+ 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
220
+ ]
221
+
222
+ # ENTITIES was generated from Python using the following code:
223
+ #
224
+ # import constants
225
+ # entities = constants.entities.items()
226
+ # entities.sort()
227
+ # list = [ ' '.join([repr(entity), '=>', ord(value)<128 and
228
+ # repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
229
+ # for entity, value in entities]
230
+ # print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
231
+
232
+ ENTITIES = {
233
+ 'AElig' => "\xc3\x86",
234
+ 'AElig;' => "\xc3\x86",
235
+ 'AMP' => '&',
236
+ 'AMP;' => '&',
237
+ 'Aacute' => "\xc3\x81",
238
+ 'Aacute;' => "\xc3\x81",
239
+ 'Acirc' => "\xc3\x82",
240
+ 'Acirc;' => "\xc3\x82",
241
+ 'Agrave' => "\xc3\x80",
242
+ 'Agrave;' => "\xc3\x80",
243
+ 'Alpha;' => "\xce\x91",
244
+ 'Aring' => "\xc3\x85",
245
+ 'Aring;' => "\xc3\x85",
246
+ 'Atilde' => "\xc3\x83",
247
+ 'Atilde;' => "\xc3\x83",
248
+ 'Auml' => "\xc3\x84",
249
+ 'Auml;' => "\xc3\x84",
250
+ 'Beta;' => "\xce\x92",
251
+ 'COPY' => "\xc2\xa9",
252
+ 'COPY;' => "\xc2\xa9",
253
+ 'Ccedil' => "\xc3\x87",
254
+ 'Ccedil;' => "\xc3\x87",
255
+ 'Chi;' => "\xce\xa7",
256
+ 'Dagger;' => "\xe2\x80\xa1",
257
+ 'Delta;' => "\xce\x94",
258
+ 'ETH' => "\xc3\x90",
259
+ 'ETH;' => "\xc3\x90",
260
+ 'Eacute' => "\xc3\x89",
261
+ 'Eacute;' => "\xc3\x89",
262
+ 'Ecirc' => "\xc3\x8a",
263
+ 'Ecirc;' => "\xc3\x8a",
264
+ 'Egrave' => "\xc3\x88",
265
+ 'Egrave;' => "\xc3\x88",
266
+ 'Epsilon;' => "\xce\x95",
267
+ 'Eta;' => "\xce\x97",
268
+ 'Euml' => "\xc3\x8b",
269
+ 'Euml;' => "\xc3\x8b",
270
+ 'GT' => '>',
271
+ 'GT;' => '>',
272
+ 'Gamma;' => "\xce\x93",
273
+ 'Iacute' => "\xc3\x8d",
274
+ 'Iacute;' => "\xc3\x8d",
275
+ 'Icirc' => "\xc3\x8e",
276
+ 'Icirc;' => "\xc3\x8e",
277
+ 'Igrave' => "\xc3\x8c",
278
+ 'Igrave;' => "\xc3\x8c",
279
+ 'Iota;' => "\xce\x99",
280
+ 'Iuml' => "\xc3\x8f",
281
+ 'Iuml;' => "\xc3\x8f",
282
+ 'Kappa;' => "\xce\x9a",
283
+ 'LT' => '<',
284
+ 'LT;' => '<',
285
+ 'Lambda;' => "\xce\x9b",
286
+ 'Mu;' => "\xce\x9c",
287
+ 'Ntilde' => "\xc3\x91",
288
+ 'Ntilde;' => "\xc3\x91",
289
+ 'Nu;' => "\xce\x9d",
290
+ 'OElig;' => "\xc5\x92",
291
+ 'Oacute' => "\xc3\x93",
292
+ 'Oacute;' => "\xc3\x93",
293
+ 'Ocirc' => "\xc3\x94",
294
+ 'Ocirc;' => "\xc3\x94",
295
+ 'Ograve' => "\xc3\x92",
296
+ 'Ograve;' => "\xc3\x92",
297
+ 'Omega;' => "\xce\xa9",
298
+ 'Omicron;' => "\xce\x9f",
299
+ 'Oslash' => "\xc3\x98",
300
+ 'Oslash;' => "\xc3\x98",
301
+ 'Otilde' => "\xc3\x95",
302
+ 'Otilde;' => "\xc3\x95",
303
+ 'Ouml' => "\xc3\x96",
304
+ 'Ouml;' => "\xc3\x96",
305
+ 'Phi;' => "\xce\xa6",
306
+ 'Pi;' => "\xce\xa0",
307
+ 'Prime;' => "\xe2\x80\xb3",
308
+ 'Psi;' => "\xce\xa8",
309
+ 'QUOT' => '"',
310
+ 'QUOT;' => '"',
311
+ 'REG' => "\xc2\xae",
312
+ 'REG;' => "\xc2\xae",
313
+ 'Rho;' => "\xce\xa1",
314
+ 'Scaron;' => "\xc5\xa0",
315
+ 'Sigma;' => "\xce\xa3",
316
+ 'THORN' => "\xc3\x9e",
317
+ 'THORN;' => "\xc3\x9e",
318
+ 'TRADE;' => "\xe2\x84\xa2",
319
+ 'Tau;' => "\xce\xa4",
320
+ 'Theta;' => "\xce\x98",
321
+ 'Uacute' => "\xc3\x9a",
322
+ 'Uacute;' => "\xc3\x9a",
323
+ 'Ucirc' => "\xc3\x9b",
324
+ 'Ucirc;' => "\xc3\x9b",
325
+ 'Ugrave' => "\xc3\x99",
326
+ 'Ugrave;' => "\xc3\x99",
327
+ 'Upsilon;' => "\xce\xa5",
328
+ 'Uuml' => "\xc3\x9c",
329
+ 'Uuml;' => "\xc3\x9c",
330
+ 'Xi;' => "\xce\x9e",
331
+ 'Yacute' => "\xc3\x9d",
332
+ 'Yacute;' => "\xc3\x9d",
333
+ 'Yuml;' => "\xc5\xb8",
334
+ 'Zeta;' => "\xce\x96",
335
+ 'aacute' => "\xc3\xa1",
336
+ 'aacute;' => "\xc3\xa1",
337
+ 'acirc' => "\xc3\xa2",
338
+ 'acirc;' => "\xc3\xa2",
339
+ 'acute' => "\xc2\xb4",
340
+ 'acute;' => "\xc2\xb4",
341
+ 'aelig' => "\xc3\xa6",
342
+ 'aelig;' => "\xc3\xa6",
343
+ 'agrave' => "\xc3\xa0",
344
+ 'agrave;' => "\xc3\xa0",
345
+ 'alefsym;' => "\xe2\x84\xb5",
346
+ 'alpha;' => "\xce\xb1",
347
+ 'amp' => '&',
348
+ 'amp;' => '&',
349
+ 'and;' => "\xe2\x88\xa7",
350
+ 'ang;' => "\xe2\x88\xa0",
351
+ 'apos;' => "'",
352
+ 'aring' => "\xc3\xa5",
353
+ 'aring;' => "\xc3\xa5",
354
+ 'asymp;' => "\xe2\x89\x88",
355
+ 'atilde' => "\xc3\xa3",
356
+ 'atilde;' => "\xc3\xa3",
357
+ 'auml' => "\xc3\xa4",
358
+ 'auml;' => "\xc3\xa4",
359
+ 'bdquo;' => "\xe2\x80\x9e",
360
+ 'beta;' => "\xce\xb2",
361
+ 'brvbar' => "\xc2\xa6",
362
+ 'brvbar;' => "\xc2\xa6",
363
+ 'bull;' => "\xe2\x80\xa2",
364
+ 'cap;' => "\xe2\x88\xa9",
365
+ 'ccedil' => "\xc3\xa7",
366
+ 'ccedil;' => "\xc3\xa7",
367
+ 'cedil' => "\xc2\xb8",
368
+ 'cedil;' => "\xc2\xb8",
369
+ 'cent' => "\xc2\xa2",
370
+ 'cent;' => "\xc2\xa2",
371
+ 'chi;' => "\xcf\x87",
372
+ 'circ;' => "\xcb\x86",
373
+ 'clubs;' => "\xe2\x99\xa3",
374
+ 'cong;' => "\xe2\x89\x85",
375
+ 'copy' => "\xc2\xa9",
376
+ 'copy;' => "\xc2\xa9",
377
+ 'crarr;' => "\xe2\x86\xb5",
378
+ 'cup;' => "\xe2\x88\xaa",
379
+ 'curren' => "\xc2\xa4",
380
+ 'curren;' => "\xc2\xa4",
381
+ 'dArr;' => "\xe2\x87\x93",
382
+ 'dagger;' => "\xe2\x80\xa0",
383
+ 'darr;' => "\xe2\x86\x93",
384
+ 'deg' => "\xc2\xb0",
385
+ 'deg;' => "\xc2\xb0",
386
+ 'delta;' => "\xce\xb4",
387
+ 'diams;' => "\xe2\x99\xa6",
388
+ 'divide' => "\xc3\xb7",
389
+ 'divide;' => "\xc3\xb7",
390
+ 'eacute' => "\xc3\xa9",
391
+ 'eacute;' => "\xc3\xa9",
392
+ 'ecirc' => "\xc3\xaa",
393
+ 'ecirc;' => "\xc3\xaa",
394
+ 'egrave' => "\xc3\xa8",
395
+ 'egrave;' => "\xc3\xa8",
396
+ 'empty;' => "\xe2\x88\x85",
397
+ 'emsp;' => "\xe2\x80\x83",
398
+ 'ensp;' => "\xe2\x80\x82",
399
+ 'epsilon;' => "\xce\xb5",
400
+ 'equiv;' => "\xe2\x89\xa1",
401
+ 'eta;' => "\xce\xb7",
402
+ 'eth' => "\xc3\xb0",
403
+ 'eth;' => "\xc3\xb0",
404
+ 'euml' => "\xc3\xab",
405
+ 'euml;' => "\xc3\xab",
406
+ 'euro;' => "\xe2\x82\xac",
407
+ 'exist;' => "\xe2\x88\x83",
408
+ 'fnof;' => "\xc6\x92",
409
+ 'forall;' => "\xe2\x88\x80",
410
+ 'frac12' => "\xc2\xbd",
411
+ 'frac12;' => "\xc2\xbd",
412
+ 'frac14' => "\xc2\xbc",
413
+ 'frac14;' => "\xc2\xbc",
414
+ 'frac34' => "\xc2\xbe",
415
+ 'frac34;' => "\xc2\xbe",
416
+ 'frasl;' => "\xe2\x81\x84",
417
+ 'gamma;' => "\xce\xb3",
418
+ 'ge;' => "\xe2\x89\xa5",
419
+ 'gt' => '>',
420
+ 'gt;' => '>',
421
+ 'hArr;' => "\xe2\x87\x94",
422
+ 'harr;' => "\xe2\x86\x94",
423
+ 'hearts;' => "\xe2\x99\xa5",
424
+ 'hellip;' => "\xe2\x80\xa6",
425
+ 'iacute' => "\xc3\xad",
426
+ 'iacute;' => "\xc3\xad",
427
+ 'icirc' => "\xc3\xae",
428
+ 'icirc;' => "\xc3\xae",
429
+ 'iexcl' => "\xc2\xa1",
430
+ 'iexcl;' => "\xc2\xa1",
431
+ 'igrave' => "\xc3\xac",
432
+ 'igrave;' => "\xc3\xac",
433
+ 'image;' => "\xe2\x84\x91",
434
+ 'infin;' => "\xe2\x88\x9e",
435
+ 'int;' => "\xe2\x88\xab",
436
+ 'iota;' => "\xce\xb9",
437
+ 'iquest' => "\xc2\xbf",
438
+ 'iquest;' => "\xc2\xbf",
439
+ 'isin;' => "\xe2\x88\x88",
440
+ 'iuml' => "\xc3\xaf",
441
+ 'iuml;' => "\xc3\xaf",
442
+ 'kappa;' => "\xce\xba",
443
+ 'lArr;' => "\xe2\x87\x90",
444
+ 'lambda;' => "\xce\xbb",
445
+ 'lang;' => "\xe3\x80\x88",
446
+ 'laquo' => "\xc2\xab",
447
+ 'laquo;' => "\xc2\xab",
448
+ 'larr;' => "\xe2\x86\x90",
449
+ 'lceil;' => "\xe2\x8c\x88",
450
+ 'ldquo;' => "\xe2\x80\x9c",
451
+ 'le;' => "\xe2\x89\xa4",
452
+ 'lfloor;' => "\xe2\x8c\x8a",
453
+ 'lowast;' => "\xe2\x88\x97",
454
+ 'loz;' => "\xe2\x97\x8a",
455
+ 'lrm;' => "\xe2\x80\x8e",
456
+ 'lsaquo;' => "\xe2\x80\xb9",
457
+ 'lsquo;' => "\xe2\x80\x98",
458
+ 'lt' => '<',
459
+ 'lt;' => '<',
460
+ 'macr' => "\xc2\xaf",
461
+ 'macr;' => "\xc2\xaf",
462
+ 'mdash;' => "\xe2\x80\x94",
463
+ 'micro' => "\xc2\xb5",
464
+ 'micro;' => "\xc2\xb5",
465
+ 'middot' => "\xc2\xb7",
466
+ 'middot;' => "\xc2\xb7",
467
+ 'minus;' => "\xe2\x88\x92",
468
+ 'mu;' => "\xce\xbc",
469
+ 'nabla;' => "\xe2\x88\x87",
470
+ 'nbsp' => "\xc2\xa0",
471
+ 'nbsp;' => "\xc2\xa0",
472
+ 'ndash;' => "\xe2\x80\x93",
473
+ 'ne;' => "\xe2\x89\xa0",
474
+ 'ni;' => "\xe2\x88\x8b",
475
+ 'not' => "\xc2\xac",
476
+ 'not;' => "\xc2\xac",
477
+ 'notin;' => "\xe2\x88\x89",
478
+ 'nsub;' => "\xe2\x8a\x84",
479
+ 'ntilde' => "\xc3\xb1",
480
+ 'ntilde;' => "\xc3\xb1",
481
+ 'nu;' => "\xce\xbd",
482
+ 'oacute' => "\xc3\xb3",
483
+ 'oacute;' => "\xc3\xb3",
484
+ 'ocirc' => "\xc3\xb4",
485
+ 'ocirc;' => "\xc3\xb4",
486
+ 'oelig;' => "\xc5\x93",
487
+ 'ograve' => "\xc3\xb2",
488
+ 'ograve;' => "\xc3\xb2",
489
+ 'oline;' => "\xe2\x80\xbe",
490
+ 'omega;' => "\xcf\x89",
491
+ 'omicron;' => "\xce\xbf",
492
+ 'oplus;' => "\xe2\x8a\x95",
493
+ 'or;' => "\xe2\x88\xa8",
494
+ 'ordf' => "\xc2\xaa",
495
+ 'ordf;' => "\xc2\xaa",
496
+ 'ordm' => "\xc2\xba",
497
+ 'ordm;' => "\xc2\xba",
498
+ 'oslash' => "\xc3\xb8",
499
+ 'oslash;' => "\xc3\xb8",
500
+ 'otilde' => "\xc3\xb5",
501
+ 'otilde;' => "\xc3\xb5",
502
+ 'otimes;' => "\xe2\x8a\x97",
503
+ 'ouml' => "\xc3\xb6",
504
+ 'ouml;' => "\xc3\xb6",
505
+ 'para' => "\xc2\xb6",
506
+ 'para;' => "\xc2\xb6",
507
+ 'part;' => "\xe2\x88\x82",
508
+ 'permil;' => "\xe2\x80\xb0",
509
+ 'perp;' => "\xe2\x8a\xa5",
510
+ 'phi;' => "\xcf\x86",
511
+ 'pi;' => "\xcf\x80",
512
+ 'piv;' => "\xcf\x96",
513
+ 'plusmn' => "\xc2\xb1",
514
+ 'plusmn;' => "\xc2\xb1",
515
+ 'pound' => "\xc2\xa3",
516
+ 'pound;' => "\xc2\xa3",
517
+ 'prime;' => "\xe2\x80\xb2",
518
+ 'prod;' => "\xe2\x88\x8f",
519
+ 'prop;' => "\xe2\x88\x9d",
520
+ 'psi;' => "\xcf\x88",
521
+ 'quot' => '"',
522
+ 'quot;' => '"',
523
+ 'rArr;' => "\xe2\x87\x92",
524
+ 'radic;' => "\xe2\x88\x9a",
525
+ 'rang;' => "\xe3\x80\x89",
526
+ 'raquo' => "\xc2\xbb",
527
+ 'raquo;' => "\xc2\xbb",
528
+ 'rarr;' => "\xe2\x86\x92",
529
+ 'rceil;' => "\xe2\x8c\x89",
530
+ 'rdquo;' => "\xe2\x80\x9d",
531
+ 'real;' => "\xe2\x84\x9c",
532
+ 'reg' => "\xc2\xae",
533
+ 'reg;' => "\xc2\xae",
534
+ 'rfloor;' => "\xe2\x8c\x8b",
535
+ 'rho;' => "\xcf\x81",
536
+ 'rlm;' => "\xe2\x80\x8f",
537
+ 'rsaquo;' => "\xe2\x80\xba",
538
+ 'rsquo;' => "\xe2\x80\x99",
539
+ 'sbquo;' => "\xe2\x80\x9a",
540
+ 'scaron;' => "\xc5\xa1",
541
+ 'sdot;' => "\xe2\x8b\x85",
542
+ 'sect' => "\xc2\xa7",
543
+ 'sect;' => "\xc2\xa7",
544
+ 'shy' => "\xc2\xad",
545
+ 'shy;' => "\xc2\xad",
546
+ 'sigma;' => "\xcf\x83",
547
+ 'sigmaf;' => "\xcf\x82",
548
+ 'sim;' => "\xe2\x88\xbc",
549
+ 'spades;' => "\xe2\x99\xa0",
550
+ 'sub;' => "\xe2\x8a\x82",
551
+ 'sube;' => "\xe2\x8a\x86",
552
+ 'sum;' => "\xe2\x88\x91",
553
+ 'sup1' => "\xc2\xb9",
554
+ 'sup1;' => "\xc2\xb9",
555
+ 'sup2' => "\xc2\xb2",
556
+ 'sup2;' => "\xc2\xb2",
557
+ 'sup3' => "\xc2\xb3",
558
+ 'sup3;' => "\xc2\xb3",
559
+ 'sup;' => "\xe2\x8a\x83",
560
+ 'supe;' => "\xe2\x8a\x87",
561
+ 'szlig' => "\xc3\x9f",
562
+ 'szlig;' => "\xc3\x9f",
563
+ 'tau;' => "\xcf\x84",
564
+ 'there4;' => "\xe2\x88\xb4",
565
+ 'theta;' => "\xce\xb8",
566
+ 'thetasym;' => "\xcf\x91",
567
+ 'thinsp;' => "\xe2\x80\x89",
568
+ 'thorn' => "\xc3\xbe",
569
+ 'thorn;' => "\xc3\xbe",
570
+ 'tilde;' => "\xcb\x9c",
571
+ 'times' => "\xc3\x97",
572
+ 'times;' => "\xc3\x97",
573
+ 'trade;' => "\xe2\x84\xa2",
574
+ 'uArr;' => "\xe2\x87\x91",
575
+ 'uacute' => "\xc3\xba",
576
+ 'uacute;' => "\xc3\xba",
577
+ 'uarr;' => "\xe2\x86\x91",
578
+ 'ucirc' => "\xc3\xbb",
579
+ 'ucirc;' => "\xc3\xbb",
580
+ 'ugrave' => "\xc3\xb9",
581
+ 'ugrave;' => "\xc3\xb9",
582
+ 'uml' => "\xc2\xa8",
583
+ 'uml;' => "\xc2\xa8",
584
+ 'upsih;' => "\xcf\x92",
585
+ 'upsilon;' => "\xcf\x85",
586
+ 'uuml' => "\xc3\xbc",
587
+ 'uuml;' => "\xc3\xbc",
588
+ 'weierp;' => "\xe2\x84\x98",
589
+ 'xi;' => "\xce\xbe",
590
+ 'yacute' => "\xc3\xbd",
591
+ 'yacute;' => "\xc3\xbd",
592
+ 'yen' => "\xc2\xa5",
593
+ 'yen;' => "\xc2\xa5",
594
+ 'yuml' => "\xc3\xbf",
595
+ 'yuml;' => "\xc3\xbf",
596
+ 'zeta;' => "\xce\xb6",
597
+ 'zwj;' => "\xe2\x80\x8d",
598
+ 'zwnj;' => "\xe2\x80\x8c"
599
+ }
600
+
601
+ ENCODINGS = %w[
602
+ ansi_x3.4-1968
603
+ iso-ir-6
604
+ ansi_x3.4-1986
605
+ iso_646.irv:1991
606
+ ascii
607
+ iso646-us
608
+ us-ascii
609
+ us
610
+ ibm367
611
+ cp367
612
+ csascii
613
+ ks_c_5601-1987
614
+ korean
615
+ iso-2022-kr
616
+ csiso2022kr
617
+ euc-kr
618
+ iso-2022-jp
619
+ csiso2022jp
620
+ iso-2022-jp-2
621
+ iso-ir-58
622
+ chinese
623
+ csiso58gb231280
624
+ iso_8859-1:1987
625
+ iso-ir-100
626
+ iso_8859-1
627
+ iso-8859-1
628
+ latin1
629
+ l1
630
+ ibm819
631
+ cp819
632
+ csisolatin1
633
+ iso_8859-2:1987
634
+ iso-ir-101
635
+ iso_8859-2
636
+ iso-8859-2
637
+ latin2
638
+ l2
639
+ csisolatin2
640
+ iso_8859-3:1988
641
+ iso-ir-109
642
+ iso_8859-3
643
+ iso-8859-3
644
+ latin3
645
+ l3
646
+ csisolatin3
647
+ iso_8859-4:1988
648
+ iso-ir-110
649
+ iso_8859-4
650
+ iso-8859-4
651
+ latin4
652
+ l4
653
+ csisolatin4
654
+ iso_8859-6:1987
655
+ iso-ir-127
656
+ iso_8859-6
657
+ iso-8859-6
658
+ ecma-114
659
+ asmo-708
660
+ arabic
661
+ csisolatinarabic
662
+ iso_8859-7:1987
663
+ iso-ir-126
664
+ iso_8859-7
665
+ iso-8859-7
666
+ elot_928
667
+ ecma-118
668
+ greek
669
+ greek8
670
+ csisolatingreek
671
+ iso_8859-8:1988
672
+ iso-ir-138
673
+ iso_8859-8
674
+ iso-8859-8
675
+ hebrew
676
+ csisolatinhebrew
677
+ iso_8859-5:1988
678
+ iso-ir-144
679
+ iso_8859-5
680
+ iso-8859-5
681
+ cyrillic
682
+ csisolatincyrillic
683
+ iso_8859-9:1989
684
+ iso-ir-148
685
+ iso_8859-9
686
+ iso-8859-9
687
+ latin5
688
+ l5
689
+ csisolatin5
690
+ iso-8859-10
691
+ iso-ir-157
692
+ l6
693
+ iso_8859-10:1992
694
+ csisolatin6
695
+ latin6
696
+ hp-roman8
697
+ roman8
698
+ r8
699
+ ibm037
700
+ cp037
701
+ csibm037
702
+ ibm424
703
+ cp424
704
+ csibm424
705
+ ibm437
706
+ cp437
707
+ 437
708
+ cspc8codepage437
709
+ ibm500
710
+ cp500
711
+ csibm500
712
+ ibm775
713
+ cp775
714
+ cspc775baltic
715
+ ibm850
716
+ cp850
717
+ 850
718
+ cspc850multilingual
719
+ ibm852
720
+ cp852
721
+ 852
722
+ cspcp852
723
+ ibm855
724
+ cp855
725
+ 855
726
+ csibm855
727
+ ibm857
728
+ cp857
729
+ 857
730
+ csibm857
731
+ ibm860
732
+ cp860
733
+ 860
734
+ csibm860
735
+ ibm861
736
+ cp861
737
+ 861
738
+ cp-is
739
+ csibm861
740
+ ibm862
741
+ cp862
742
+ 862
743
+ cspc862latinhebrew
744
+ ibm863
745
+ cp863
746
+ 863
747
+ csibm863
748
+ ibm864
749
+ cp864
750
+ csibm864
751
+ ibm865
752
+ cp865
753
+ 865
754
+ csibm865
755
+ ibm866
756
+ cp866
757
+ 866
758
+ csibm866
759
+ ibm869
760
+ cp869
761
+ 869
762
+ cp-gr
763
+ csibm869
764
+ ibm1026
765
+ cp1026
766
+ csibm1026
767
+ koi8-r
768
+ cskoi8r
769
+ koi8-u
770
+ big5-hkscs
771
+ ptcp154
772
+ csptcp154
773
+ pt154
774
+ cp154
775
+ utf-7
776
+ utf-16be
777
+ utf-16le
778
+ utf-16
779
+ utf-8
780
+ iso-8859-13
781
+ iso-8859-14
782
+ iso-ir-199
783
+ iso_8859-14:1998
784
+ iso_8859-14
785
+ latin8
786
+ iso-celtic
787
+ l8
788
+ iso-8859-15
789
+ iso_8859-15
790
+ iso-8859-16
791
+ iso-ir-226
792
+ iso_8859-16:2001
793
+ iso_8859-16
794
+ latin10
795
+ l10
796
+ gbk
797
+ cp936
798
+ ms936
799
+ gb18030
800
+ shift_jis
801
+ ms_kanji
802
+ csshiftjis
803
+ euc-jp
804
+ gb2312
805
+ big5
806
+ csbig5
807
+ windows-1250
808
+ windows-1251
809
+ windows-1252
810
+ windows-1253
811
+ windows-1254
812
+ windows-1255
813
+ windows-1256
814
+ windows-1257
815
+ windows-1258
816
+ tis-620
817
+ hz-gb-2312
818
+ ]
819
+
820
+ E = {
821
+ "null-character" =>
822
+ _("Null character in input stream, replaced with U+FFFD."),
823
+ "incorrectly-placed-solidus" =>
824
+ _("Solidus (/) incorrectly placed in tag."),
825
+ "incorrect-cr-newline-entity" =>
826
+ _("Incorrect CR newline entity, replaced with LF."),
827
+ "illegal-windows-1252-entity" =>
828
+ _("Entity used with illegal number (windows-1252 reference)."),
829
+ "cant-convert-numeric-entity" =>
830
+ _("Numeric entity couldn't be converted to character " +
831
+ "(codepoint U+%(charAsInt)08x)."),
832
+ "illegal-codepoint-for-numeric-entity" =>
833
+ _("Numeric entity represents an illegal codepoint=> " +
834
+ "U+%(charAsInt)08x."),
835
+ "numeric-entity-without-semicolon" =>
836
+ _("Numeric entity didn't end with ';'."),
837
+ "expected-numeric-entity-but-got-eof" =>
838
+ _("Numeric entity expected. Got end of file instead."),
839
+ "expected-numeric-entity" =>
840
+ _("Numeric entity expected but none found."),
841
+ "named-entity-without-semicolon" =>
842
+ _("Named entity didn't end with ';'."),
843
+ "expected-named-entity" =>
844
+ _("Named entity expected. Got none."),
845
+ "attributes-in-end-tag" =>
846
+ _("End tag contains unexpected attributes."),
847
+ "expected-tag-name-but-got-right-bracket" =>
848
+ _("Expected tag name. Got '>' instead."),
849
+ "expected-tag-name-but-got-question-mark" =>
850
+ _("Expected tag name. Got '?' instead. (HTML doesn't " +
851
+ "support processing instructions.)"),
852
+ "expected-tag-name" =>
853
+ _("Expected tag name. Got something else instead"),
854
+ "expected-closing-tag-but-got-right-bracket" =>
855
+ _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
856
+ "expected-closing-tag-but-got-eof" =>
857
+ _("Expected closing tag. Unexpected end of file."),
858
+ "expected-closing-tag-but-got-char" =>
859
+ _("Expected closing tag. Unexpected character '%(data)' found."),
860
+ "eof-in-tag-name" =>
861
+ _("Unexpected end of file in the tag name."),
862
+ "expected-attribute-name-but-got-eof" =>
863
+ _("Unexpected end of file. Expected attribute name instead."),
864
+ "eof-in-attribute-name" =>
865
+ _("Unexpected end of file in attribute name."),
866
+ "duplicate-attribute" =>
867
+ _("Dropped duplicate attribute on tag."),
868
+ "expected-end-of-tag-name-but-got-eof" =>
869
+ _("Unexpected end of file. Expected = or end of tag."),
870
+ "expected-attribute-value-but-got-eof" =>
871
+ _("Unexpected end of file. Expected attribute value."),
872
+ "eof-in-attribute-value-double-quote" =>
873
+ _("Unexpected end of file in attribute value (\")."),
874
+ "eof-in-attribute-value-single-quote" =>
875
+ _("Unexpected end of file in attribute value (')."),
876
+ "eof-in-attribute-value-no-quotes" =>
877
+ _("Unexpected end of file in attribute value."),
878
+ "expected-dashes-or-doctype" =>
879
+ _("Expected '--' or 'DOCTYPE'. Not found."),
880
+ "incorrect-comment" =>
881
+ _("Incorrect comment."),
882
+ "eof-in-comment" =>
883
+ _("Unexpected end of file in comment."),
884
+ "eof-in-comment-end-dash" =>
885
+ _("Unexpected end of file in comment (-)"),
886
+ "unexpected-dash-after-double-dash-in-comment" =>
887
+ _("Unexpected '-' after '--' found in comment."),
888
+ "eof-in-comment-double-dash" =>
889
+ _("Unexpected end of file in comment (--)."),
890
+ "unexpected-char-in-comment" =>
891
+ _("Unexpected character in comment found."),
892
+ "need-space-after-doctype" =>
893
+ _("No space after literal string 'DOCTYPE'."),
894
+ "expected-doctype-name-but-got-right-bracket" =>
895
+ _("Unexpected > character. Expected DOCTYPE name."),
896
+ "expected-doctype-name-but-got-eof" =>
897
+ _("Unexpected end of file. Expected DOCTYPE name."),
898
+ "eof-in-doctype-name" =>
899
+ _("Unexpected end of file in DOCTYPE name."),
900
+ "eof-in-doctype" =>
901
+ _("Unexpected end of file in DOCTYPE."),
902
+ "expected-space-or-right-bracket-in-doctype" =>
903
+ _("Expected space or '>'. Got '%(data)'"),
904
+ "unexpected-end-of-doctype" =>
905
+ _("Unexpected end of DOCTYPE."),
906
+ "unexpected-char-in-doctype" =>
907
+ _("Unexpected character in DOCTYPE."),
908
+ "eof-in-bogus-doctype" =>
909
+ _("Unexpected end of file in bogus doctype."),
910
+ "eof-in-innerhtml" =>
911
+ _("XXX innerHTML EOF"),
912
+ "unexpected-doctype" =>
913
+ _("Unexpected DOCTYPE. Ignored."),
914
+ "non-html-root" =>
915
+ _("html needs to be the first start tag."),
916
+ "expected-doctype-but-got-eof" =>
917
+ _("Unexpected End of file. Expected DOCTYPE."),
918
+ "unknown-doctype" =>
919
+ _("Erroneous DOCTYPE."),
920
+ "expected-doctype-but-got-chars" =>
921
+ _("Unexpected non-space characters. Expected DOCTYPE."),
922
+ "expected-doctype-but-got-start-tag" =>
923
+ _("Unexpected start tag (%(name)). Expected DOCTYPE."),
924
+ "expected-doctype-but-got-end-tag" =>
925
+ _("Unexpected end tag (%(name)). Expected DOCTYPE."),
926
+ "end-tag-after-implied-root" =>
927
+ _("Unexpected end tag (%(name)) after the (implied) root element."),
928
+ "expected-named-closing-tag-but-got-eof" =>
929
+ _("Unexpected end of file. Expected end tag (%(name))."),
930
+ "two-heads-are-not-better-than-one" =>
931
+ _("Unexpected start tag head in existing head. Ignored."),
932
+ "unexpected-end-tag" =>
933
+ _("Unexpected end tag (%(name)). Ignored."),
934
+ "unexpected-start-tag-out-of-my-head" =>
935
+ _("Unexpected start tag (%(name)) that can be in head. Moved."),
936
+ "unexpected-start-tag" =>
937
+ _("Unexpected start tag (%(name))."),
938
+ "missing-end-tag" =>
939
+ _("Missing end tag (%(name))."),
940
+ "missing-end-tags" =>
941
+ _("Missing end tags (%(name))."),
942
+ "unexpected-start-tag-implies-end-tag" =>
943
+ _("Unexpected start tag (%(startName)) " +
944
+ "implies end tag (%(endName))."),
945
+ "unexpected-start-tag-treated-as" =>
946
+ _("Unexpected start tag (%(originalName)). Treated as %(newName)."),
947
+ "deprecated-tag" =>
948
+ _("Unexpected start tag %(name). Don't use it!"),
949
+ "unexpected-start-tag-ignored" =>
950
+ _("Unexpected start tag %(name). Ignored."),
951
+ "expected-one-end-tag-but-got-another" =>
952
+ _("Unexpected end tag (%(gotName)). " +
953
+ "Missing end tag (%(expectedName))."),
954
+ "end-tag-too-early" =>
955
+ _("End tag (%(name)) seen too early. Expected other end tag."),
956
+ "end-tag-too-early-named" =>
957
+ _("Unexpected end tag (%(gotName)). Expected end tag (%(expectedName))."),
958
+ "end-tag-too-early-ignored" =>
959
+ _("End tag (%(name)) seen too early. Ignored."),
960
+ "adoption-agency-1.1" =>
961
+ _("End tag (%(name)) violates step 1, " +
962
+ "paragraph 1 of the adoption agency algorithm."),
963
+ "adoption-agency-1.2" =>
964
+ _("End tag (%(name)) violates step 1, " +
965
+ "paragraph 2 of the adoption agency algorithm."),
966
+ "adoption-agency-1.3" =>
967
+ _("End tag (%(name)) violates step 1, " +
968
+ "paragraph 3 of the adoption agency algorithm."),
969
+ "unexpected-end-tag-treated-as" =>
970
+ _("Unexpected end tag (%(originalName)). Treated as %(newName)."),
971
+ "no-end-tag" =>
972
+ _("This element (%(name)) has no end tag."),
973
+ "unexpected-implied-end-tag-in-table" =>
974
+ _("Unexpected implied end tag (%(name)) in the table phase."),
975
+ "unexpected-implied-end-tag-in-table-body" =>
976
+ _("Unexpected implied end tag (%(name)) in the table body phase."),
977
+ "unexpected-char-implies-table-voodoo" =>
978
+ _("Unexpected non-space characters in " +
979
+ "table context caused voodoo mode."),
980
+ "unexpected-start-tag-implies-table-voodoo" =>
981
+ _("Unexpected start tag (%(name)) in " +
982
+ "table context caused voodoo mode."),
983
+ "unexpected-end-tag-implies-table-voodoo" =>
984
+ _("Unexpected end tag (%(name)) in " +
985
+ "table context caused voodoo mode."),
986
+ "unexpected-cell-in-table-body" =>
987
+ _("Unexpected table cell start tag (%(name)) " +
988
+ "in the table body phase."),
989
+ "unexpected-cell-end-tag" =>
990
+ _("Got table cell end tag (%(name)) " +
991
+ "while required end tags are missing."),
992
+ "unexpected-end-tag-in-table-body" =>
993
+ _("Unexpected end tag (%(name)) in the table body phase. Ignored."),
994
+ "unexpected-implied-end-tag-in-table-row" =>
995
+ _("Unexpected implied end tag (%(name)) in the table row phase."),
996
+ "unexpected-end-tag-in-table-row" =>
997
+ _("Unexpected end tag (%(name)) in the table row phase. Ignored."),
998
+ "unexpected-select-in-select" =>
999
+ _("Unexpected select start tag in the select phase " +
1000
+ "implies select start tag."),
1001
+ "unexpected-start-tag-in-select" =>
1002
+ _("Unexpected start tag token (%(name) in the select phase. " +
1003
+ "Ignored."),
1004
+ "unexpected-end-tag-in-select" =>
1005
+ _("Unexpected end tag (%(name)) in the select phase. Ignored."),
1006
+ "unexpected-char-after-body" =>
1007
+ _("Unexpected non-space characters in the after body phase."),
1008
+ "unexpected-start-tag-after-body" =>
1009
+ _("Unexpected start tag token (%(name))" +
1010
+ " in the after body phase."),
1011
+ "unexpected-end-tag-after-body" =>
1012
+ _("Unexpected end tag token (%(name))" +
1013
+ " in the after body phase."),
1014
+ "unexpected-char-in-frameset" =>
1015
+ _("Unepxected characters in the frameset phase. Characters ignored."),
1016
+ "unexpected-start-tag-in-frameset" =>
1017
+ _("Unexpected start tag token (%(name))" +
1018
+ " in the frameset phase. Ignored."),
1019
+ "unexpected-frameset-in-frameset-innerhtml" =>
1020
+ _("Unexpected end tag token (frameset) " +
1021
+ "in the frameset phase (innerHTML)."),
1022
+ "unexpected-end-tag-in-frameset" =>
1023
+ _("Unexpected end tag token (%(name))" +
1024
+ " in the frameset phase. Ignored."),
1025
+ "unexpected-char-after-frameset" =>
1026
+ _("Unexpected non-space characters in the " +
1027
+ "after frameset phase. Ignored."),
1028
+ "unexpected-start-tag-after-frameset" =>
1029
+ _("Unexpected start tag (%(name))" +
1030
+ " in the after frameset phase. Ignored."),
1031
+ "unexpected-end-tag-after-frameset" =>
1032
+ _("Unexpected end tag (%(name))" +
1033
+ " in the after frameset phase. Ignored."),
1034
+ "expected-eof-but-got-char" =>
1035
+ _("Unexpected non-space characters. Expected end of file."),
1036
+ "expected-eof-but-got-start-tag" =>
1037
+ _("Unexpected start tag (%(name))" +
1038
+ ". Expected end of file."),
1039
+ "expected-eof-but-got-end-tag" =>
1040
+ _("Unexpected end tag (%(name))" +
1041
+ ". Expected end of file."),
1042
+ "unexpected-end-table-in-caption" =>
1043
+ _("Unexpected end table tag in caption. Generates implied end caption.")
1044
+ }
1045
+
1046
+ end