spk-html5 0.10.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +10 -0
- data/Manifest.txt +73 -0
- data/README +45 -0
- data/Rakefile.rb +33 -0
- data/bin/html5 +7 -0
- data/lib/html5.rb +13 -0
- data/lib/html5/cli.rb +248 -0
- data/lib/html5/constants.rb +1061 -0
- data/lib/html5/filters/base.rb +10 -0
- data/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/html5/filters/iso639codes.rb +755 -0
- data/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/html5/filters/rfc2046.rb +31 -0
- data/lib/html5/filters/rfc3987.rb +91 -0
- data/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/html5/filters/validator.rb +834 -0
- data/lib/html5/filters/whitespace.rb +36 -0
- data/lib/html5/html5parser.rb +247 -0
- data/lib/html5/html5parser/after_after_body_phase.rb +43 -0
- data/lib/html5/html5parser/after_after_frameset_phase.rb +32 -0
- data/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/html5/html5parser/after_head_phase.rb +55 -0
- data/lib/html5/html5parser/before_head_phase.rb +44 -0
- data/lib/html5/html5parser/before_html_phase.rb +41 -0
- data/lib/html5/html5parser/in_body_phase.rb +636 -0
- data/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/html5/html5parser/in_foreign_content_phase.rb +50 -0
- data/lib/html5/html5parser/in_frameset_phase.rb +56 -0
- data/lib/html5/html5parser/in_head_phase.rb +143 -0
- data/lib/html5/html5parser/in_row_phase.rb +96 -0
- data/lib/html5/html5parser/in_select_phase.rb +90 -0
- data/lib/html5/html5parser/in_select_table_phase.rb +35 -0
- data/lib/html5/html5parser/in_table_body_phase.rb +92 -0
- data/lib/html5/html5parser/in_table_phase.rb +177 -0
- data/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/html5/html5parser/phase.rb +171 -0
- data/lib/html5/inputstream.rb +735 -0
- data/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/html5/sanitizer.rb +209 -0
- data/lib/html5/serializer.rb +2 -0
- data/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/html5/sniffer.rb +45 -0
- data/lib/html5/tokenizer.rb +1059 -0
- data/lib/html5/treebuilders.rb +24 -0
- data/lib/html5/treebuilders/base.rb +339 -0
- data/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/html5/treebuilders/rexml.rb +215 -0
- data/lib/html5/treebuilders/simpletree.rb +191 -0
- data/lib/html5/treewalkers.rb +26 -0
- data/lib/html5/treewalkers/base.rb +162 -0
- data/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/html5/version.rb +3 -0
- data/test/preamble.rb +69 -0
- data/test/test_cli.rb +16 -0
- data/test/test_encoding.rb +35 -0
- data/test/test_input_stream.rb +26 -0
- data/test/test_lxp.rb +283 -0
- data/test/test_parser.rb +63 -0
- data/test/test_sanitizer.rb +173 -0
- data/test/test_serializer.rb +67 -0
- data/test/test_sniffer.rb +27 -0
- data/test/test_stream.rb +71 -0
- data/test/test_tokenizer.rb +95 -0
- data/test/test_treewalkers.rb +135 -0
- data/test/test_validator.rb +31 -0
- data/test/tokenizer_test_parser.rb +67 -0
- data/test19.rb +38 -0
- metadata +198 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README
|
4
|
+
Rakefile.rb
|
5
|
+
bin/html5
|
6
|
+
lib/html5.rb
|
7
|
+
lib/html5/cli.rb
|
8
|
+
lib/html5/constants.rb
|
9
|
+
lib/html5/filters/base.rb
|
10
|
+
lib/html5/filters/inject_meta_charset.rb
|
11
|
+
lib/html5/filters/iso639codes.rb
|
12
|
+
lib/html5/filters/optionaltags.rb
|
13
|
+
lib/html5/filters/rfc2046.rb
|
14
|
+
lib/html5/filters/rfc3987.rb
|
15
|
+
lib/html5/filters/sanitizer.rb
|
16
|
+
lib/html5/filters/validator.rb
|
17
|
+
lib/html5/filters/whitespace.rb
|
18
|
+
lib/html5/html5parser.rb
|
19
|
+
lib/html5/html5parser/after_after_body_phase.rb
|
20
|
+
lib/html5/html5parser/after_after_frameset_phase.rb
|
21
|
+
lib/html5/html5parser/after_body_phase.rb
|
22
|
+
lib/html5/html5parser/after_frameset_phase.rb
|
23
|
+
lib/html5/html5parser/after_head_phase.rb
|
24
|
+
lib/html5/html5parser/before_head_phase.rb
|
25
|
+
lib/html5/html5parser/before_html_phase.rb
|
26
|
+
lib/html5/html5parser/in_body_phase.rb
|
27
|
+
lib/html5/html5parser/in_caption_phase.rb
|
28
|
+
lib/html5/html5parser/in_cell_phase.rb
|
29
|
+
lib/html5/html5parser/in_column_group_phase.rb
|
30
|
+
lib/html5/html5parser/in_foreign_content_phase.rb
|
31
|
+
lib/html5/html5parser/in_frameset_phase.rb
|
32
|
+
lib/html5/html5parser/in_head_phase.rb
|
33
|
+
lib/html5/html5parser/in_row_phase.rb
|
34
|
+
lib/html5/html5parser/in_select_phase.rb
|
35
|
+
lib/html5/html5parser/in_select_table_phase.rb
|
36
|
+
lib/html5/html5parser/in_table_body_phase.rb
|
37
|
+
lib/html5/html5parser/in_table_phase.rb
|
38
|
+
lib/html5/html5parser/initial_phase.rb
|
39
|
+
lib/html5/html5parser/phase.rb
|
40
|
+
lib/html5/inputstream.rb
|
41
|
+
lib/html5/liberalxmlparser.rb
|
42
|
+
lib/html5/sanitizer.rb
|
43
|
+
lib/html5/serializer.rb
|
44
|
+
lib/html5/serializer/htmlserializer.rb
|
45
|
+
lib/html5/serializer/xhtmlserializer.rb
|
46
|
+
lib/html5/sniffer.rb
|
47
|
+
lib/html5/tokenizer.rb
|
48
|
+
lib/html5/treebuilders.rb
|
49
|
+
lib/html5/treebuilders/base.rb
|
50
|
+
lib/html5/treebuilders/hpricot.rb
|
51
|
+
lib/html5/treebuilders/rexml.rb
|
52
|
+
lib/html5/treebuilders/simpletree.rb
|
53
|
+
lib/html5/treewalkers.rb
|
54
|
+
lib/html5/treewalkers/base.rb
|
55
|
+
lib/html5/treewalkers/hpricot.rb
|
56
|
+
lib/html5/treewalkers/rexml.rb
|
57
|
+
lib/html5/treewalkers/simpletree.rb
|
58
|
+
lib/html5/version.rb
|
59
|
+
test/preamble.rb
|
60
|
+
test/test_cli.rb
|
61
|
+
test/test_encoding.rb
|
62
|
+
test/test_input_stream.rb
|
63
|
+
test/test_lxp.rb
|
64
|
+
test/test_parser.rb
|
65
|
+
test/test_sanitizer.rb
|
66
|
+
test/test_serializer.rb
|
67
|
+
test/test_sniffer.rb
|
68
|
+
test/test_stream.rb
|
69
|
+
test/test_tokenizer.rb
|
70
|
+
test/test_treewalkers.rb
|
71
|
+
test/test_validator.rb
|
72
|
+
test/tokenizer_test_parser.rb
|
73
|
+
test19.rb
|
data/README
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
html5
|
2
|
+
by Ryan King, et al
|
3
|
+
http://code.google.com/p/html5lib
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
A ruby implementation of the parsing algorithm in HTML5.
|
8
|
+
|
9
|
+
|
10
|
+
== FEATURES/PROBLEMS:
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
== SYNOPSIS:
|
15
|
+
|
16
|
+
TODO
|
17
|
+
|
18
|
+
== REQUIREMENTS:
|
19
|
+
|
20
|
+
* chardet, only tested with 0.9.0
|
21
|
+
|
22
|
+
== INSTALL:
|
23
|
+
|
24
|
+
* sudo gem install html5
|
25
|
+
|
26
|
+
== LICENSE:
|
27
|
+
|
28
|
+
Copyright (c) 2006-2007 The Authors
|
29
|
+
|
30
|
+
Contributers:
|
31
|
+
James Graham - jg307@cam.ac.uk
|
32
|
+
Anne van Kesteren - annevankesteren@gmail.com
|
33
|
+
Lachlan Hunt - lachlan.hunt@lachy.id.au
|
34
|
+
Matt McDonald - kanashii@kanashii.ca
|
35
|
+
Sam Ruby - rubys@intertwingly.net
|
36
|
+
Ian Hickson (Google) - ian@hixie.ch
|
37
|
+
Thomas Broyer - t.broyer@ltgt.net
|
38
|
+
Jacques Distler - distler@golem.ph.utexas.edu
|
39
|
+
Ryan King - ryan@theryanking.com
|
40
|
+
|
41
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
42
|
+
|
43
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
44
|
+
|
45
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'hoe'
|
3
|
+
require 'lib/html5/version'
|
4
|
+
|
5
|
+
Hoe.new("spk-html5", HTML5::VERSION) do |p|
|
6
|
+
p.name = "spk-html5"
|
7
|
+
p.description = p.paragraphs_of('README', 2).join("\n\n")
|
8
|
+
p.summary = "HTML5 parser/tokenizer."
|
9
|
+
|
10
|
+
p.author = ['Ryan King'] # TODO: add more names
|
11
|
+
p.email = 'ryan@theryanking.com'
|
12
|
+
p.url = 'http://code.google.com/p/html5lib'
|
13
|
+
p.need_zip = true
|
14
|
+
|
15
|
+
p.extra_deps << ['rchardet', '>= 1.3']
|
16
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'rcov/rcovtask'
|
20
|
+
|
21
|
+
namespace :test do
|
22
|
+
namespace :coverage do
|
23
|
+
desc "Delete aggregate coverage data."
|
24
|
+
task(:clean) { rm_f "coverage.data" }
|
25
|
+
end
|
26
|
+
desc 'Aggregate code coverage for unit, functional and integration tests'
|
27
|
+
Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t|
|
28
|
+
t.libs << "test"
|
29
|
+
t.test_files = FileList["test/test_*.rb"]
|
30
|
+
t.output_dir = "test/coverage/"
|
31
|
+
t.verbose = true
|
32
|
+
end
|
33
|
+
end
|
data/bin/html5
ADDED
data/lib/html5.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'html5/html5parser'
|
2
|
+
require 'html5/version'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
|
6
|
+
def self.parse(stream, options={})
|
7
|
+
HTMLParser.parse(stream, options)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.parse_fragment(stream, options={})
|
11
|
+
HTMLParser.parse_fragment(stream, options)
|
12
|
+
end
|
13
|
+
end
|
data/lib/html5/cli.rb
ADDED
@@ -0,0 +1,248 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__), 'lib'
|
2
|
+
require 'html5'
|
3
|
+
require 'ostruct'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
module HTML5::CLI
|
7
|
+
|
8
|
+
def self.parse_opts argv
|
9
|
+
options = OpenStruct.new
|
10
|
+
options.profile = false
|
11
|
+
options.time = false
|
12
|
+
options.output = :html
|
13
|
+
options.treebuilder = 'simpletree'
|
14
|
+
options.error = false
|
15
|
+
options.encoding = false
|
16
|
+
options.parsemethod = :parse
|
17
|
+
options.serializer = {
|
18
|
+
:encoding => 'utf-8',
|
19
|
+
:omit_optional_tags => false,
|
20
|
+
:inject_meta_charset => false
|
21
|
+
}
|
22
|
+
|
23
|
+
opts = OptionParser.new do |opts|
|
24
|
+
opts.separator ""
|
25
|
+
opts.separator "Parse Options:"
|
26
|
+
|
27
|
+
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
28
|
+
options.treebuilder = treebuilder
|
29
|
+
end
|
30
|
+
|
31
|
+
opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container|
|
32
|
+
options.parsemethod = :parse_fragment
|
33
|
+
options.container = container if container
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.separator ""
|
37
|
+
opts.separator "Filter Options:"
|
38
|
+
|
39
|
+
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
40
|
+
options.serializer[:inject_meta_charset] = inject
|
41
|
+
end
|
42
|
+
|
43
|
+
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
44
|
+
options.serializer[:strip_whitespace] = strip
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
48
|
+
options.serializer[:sanitize] = sanitize
|
49
|
+
end
|
50
|
+
|
51
|
+
opts.separator ""
|
52
|
+
opts.separator "Output Options:"
|
53
|
+
|
54
|
+
opts.on("--tree", "output as debug tree") do |tree|
|
55
|
+
options.output = :tree
|
56
|
+
end
|
57
|
+
|
58
|
+
opts.on("-x", "--xml", "output as xml") do |xml|
|
59
|
+
options.output = :xml
|
60
|
+
options.treebuilder = "rexml"
|
61
|
+
end
|
62
|
+
|
63
|
+
opts.on("--[no-]html", "Output as html") do |html|
|
64
|
+
options.output = (html ? :html : nil)
|
65
|
+
end
|
66
|
+
|
67
|
+
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
68
|
+
options.output = :hilite
|
69
|
+
end
|
70
|
+
|
71
|
+
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
72
|
+
options.error = error
|
73
|
+
end
|
74
|
+
|
75
|
+
opts.separator ""
|
76
|
+
opts.separator "Serialization Options:"
|
77
|
+
|
78
|
+
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
79
|
+
options.serializer[:omit_optional_tags] = omit
|
80
|
+
end
|
81
|
+
|
82
|
+
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
83
|
+
options.serializer[:quote_attr_values] = quote
|
84
|
+
end
|
85
|
+
|
86
|
+
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
87
|
+
options.serializer[:use_best_quote_char] = best
|
88
|
+
end
|
89
|
+
|
90
|
+
opts.on("--quote-char C", "Use specified quote character") do |c|
|
91
|
+
options.serializer[:quote_char] = c
|
92
|
+
end
|
93
|
+
|
94
|
+
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
95
|
+
options.serializer[:minimize_boolean_attributes] = min
|
96
|
+
end
|
97
|
+
|
98
|
+
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
99
|
+
options.serializer[:use_trailing_solidus] = slash
|
100
|
+
end
|
101
|
+
|
102
|
+
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
103
|
+
options.serializer[:escape_lt_in_attrs] = lt
|
104
|
+
end
|
105
|
+
|
106
|
+
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
|
107
|
+
options.serializer[:escape_rcdata] = rcdata
|
108
|
+
end
|
109
|
+
|
110
|
+
opts.separator ""
|
111
|
+
opts.separator "Other Options:"
|
112
|
+
|
113
|
+
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
114
|
+
options.profile = profile
|
115
|
+
end
|
116
|
+
|
117
|
+
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
118
|
+
options.time = time
|
119
|
+
end
|
120
|
+
|
121
|
+
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
122
|
+
options.encoding = encoding
|
123
|
+
end
|
124
|
+
|
125
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
126
|
+
puts opts
|
127
|
+
exit
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
end
|
132
|
+
opts.parse!(argv)
|
133
|
+
options
|
134
|
+
end
|
135
|
+
|
136
|
+
def self.open_input f
|
137
|
+
if f
|
138
|
+
begin
|
139
|
+
if f[0..6] == 'http://'
|
140
|
+
require 'open-uri'
|
141
|
+
f = URI.parse(f).open
|
142
|
+
encoding = f.charset
|
143
|
+
elsif f == '-'
|
144
|
+
f = $stdin
|
145
|
+
else
|
146
|
+
f = open(f)
|
147
|
+
end
|
148
|
+
rescue
|
149
|
+
end
|
150
|
+
else
|
151
|
+
$stderr.write("No filename provided. Use -h for help\n")
|
152
|
+
exit(1)
|
153
|
+
end
|
154
|
+
f
|
155
|
+
end
|
156
|
+
|
157
|
+
def self.parse(opts, args)
|
158
|
+
encoding = nil
|
159
|
+
|
160
|
+
f = open_input args.last
|
161
|
+
|
162
|
+
require 'html5/treebuilders'
|
163
|
+
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
|
164
|
+
|
165
|
+
if opts.output == :xml
|
166
|
+
require 'html5/liberalxmlparser'
|
167
|
+
p = HTML5::XMLParser.new(:tree=>treebuilder)
|
168
|
+
else
|
169
|
+
require 'html5/html5parser'
|
170
|
+
p = HTML5::HTMLParser.new(:tree=>treebuilder)
|
171
|
+
end
|
172
|
+
|
173
|
+
if opts.parsemethod == :parse
|
174
|
+
args = [f, encoding]
|
175
|
+
else
|
176
|
+
args = [f, (opts.container || 'div'), encoding]
|
177
|
+
end
|
178
|
+
|
179
|
+
if opts.profile
|
180
|
+
require 'profiler'
|
181
|
+
Profiler__::start_profile
|
182
|
+
p.send(opts.parsemethod, *args)
|
183
|
+
Profiler__::stop_profile
|
184
|
+
Profiler__::print_profile($stderr)
|
185
|
+
elsif opts.time
|
186
|
+
require 'time' # TODO: switch to benchmark
|
187
|
+
t0 = Time.new
|
188
|
+
document = p.send(opts.parsemethod, *args)
|
189
|
+
t1 = Time.new
|
190
|
+
print_output(p, document, opts)
|
191
|
+
t2 = Time.new
|
192
|
+
puts "\n\nRun took: #{t1-t0}s (plus #{t2-t1}s to print the output)"
|
193
|
+
else
|
194
|
+
document = p.send(opts.parsemethod, *args)
|
195
|
+
print_output(p, document, opts)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def self.print_output(parser, document, opts)
|
200
|
+
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
|
201
|
+
|
202
|
+
case opts.output
|
203
|
+
when :xml
|
204
|
+
print document
|
205
|
+
when :html
|
206
|
+
require 'html5/treewalkers'
|
207
|
+
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
|
208
|
+
require 'html5/serializer'
|
209
|
+
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
|
210
|
+
when :hilite
|
211
|
+
print document.hilite
|
212
|
+
when :tree
|
213
|
+
document = [document] unless document.respond_to?(:each)
|
214
|
+
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
|
215
|
+
end
|
216
|
+
|
217
|
+
if opts.error
|
218
|
+
errList=[]
|
219
|
+
for pos, errorcode, datavars in parser.errors
|
220
|
+
formatstring = HTML5::E[errorcode] || 'Unknown error "%(errorcode)"'
|
221
|
+
message = PythonicTemplate.new(formatstring).to_s(datavars)
|
222
|
+
errList << "Line #{pos[0]} Col #{pos[1]} " + message
|
223
|
+
end
|
224
|
+
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
class PythonicTemplate
|
229
|
+
# convert Python format string into a Ruby string, ready to eval
|
230
|
+
def initialize format
|
231
|
+
@format = format.dup
|
232
|
+
@format.gsub!('"', '\\"')
|
233
|
+
@format.gsub!(/%\((\w+)\)/, '#{@_\1}')
|
234
|
+
@format = '"' + @format + '"'
|
235
|
+
end
|
236
|
+
|
237
|
+
# evaluate string
|
238
|
+
def to_s(vars=nil)
|
239
|
+
vars.each {|var,value| eval "@_#{var}=#{value.dump}"} if vars
|
240
|
+
eval @format
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def self.run
|
245
|
+
options = parse_opts ARGV
|
246
|
+
parse options, ARGV
|
247
|
+
end
|
248
|
+
end
|
@@ -0,0 +1,1061 @@
|
|
1
|
+
module HTML5
|
2
|
+
|
3
|
+
class EOF < Exception; end
|
4
|
+
|
5
|
+
def self._(str); str end
|
6
|
+
|
7
|
+
CONTENT_MODEL_FLAGS = [
|
8
|
+
:PCDATA,
|
9
|
+
:RCDATA,
|
10
|
+
:CDATA,
|
11
|
+
:PLAINTEXT
|
12
|
+
]
|
13
|
+
|
14
|
+
SCOPING_ELEMENTS = %w[
|
15
|
+
applet
|
16
|
+
button
|
17
|
+
caption
|
18
|
+
html
|
19
|
+
marquee
|
20
|
+
object
|
21
|
+
table
|
22
|
+
td
|
23
|
+
th
|
24
|
+
]
|
25
|
+
|
26
|
+
FORMATTING_ELEMENTS = %w[
|
27
|
+
a
|
28
|
+
b
|
29
|
+
big
|
30
|
+
em
|
31
|
+
font
|
32
|
+
i
|
33
|
+
nobr
|
34
|
+
s
|
35
|
+
small
|
36
|
+
strike
|
37
|
+
strong
|
38
|
+
tt
|
39
|
+
u
|
40
|
+
]
|
41
|
+
|
42
|
+
SPECIAL_ELEMENTS = %w[
|
43
|
+
address
|
44
|
+
area
|
45
|
+
base
|
46
|
+
basefont
|
47
|
+
bgsound
|
48
|
+
blockquote
|
49
|
+
body
|
50
|
+
br
|
51
|
+
center
|
52
|
+
col
|
53
|
+
colgroup
|
54
|
+
dd
|
55
|
+
dir
|
56
|
+
div
|
57
|
+
dl
|
58
|
+
dt
|
59
|
+
embed
|
60
|
+
fieldset
|
61
|
+
form
|
62
|
+
frame
|
63
|
+
frameset
|
64
|
+
h1
|
65
|
+
h2
|
66
|
+
h3
|
67
|
+
h4
|
68
|
+
h5
|
69
|
+
h6
|
70
|
+
head
|
71
|
+
hr
|
72
|
+
iframe
|
73
|
+
image
|
74
|
+
img
|
75
|
+
input
|
76
|
+
isindex
|
77
|
+
li
|
78
|
+
link
|
79
|
+
listing
|
80
|
+
menu
|
81
|
+
meta
|
82
|
+
noembed
|
83
|
+
noframes
|
84
|
+
noscript
|
85
|
+
ol
|
86
|
+
optgroup
|
87
|
+
option
|
88
|
+
p
|
89
|
+
param
|
90
|
+
plaintext
|
91
|
+
pre
|
92
|
+
script
|
93
|
+
select
|
94
|
+
spacer
|
95
|
+
style
|
96
|
+
tbody
|
97
|
+
textarea
|
98
|
+
tfoot
|
99
|
+
thead
|
100
|
+
title
|
101
|
+
tr
|
102
|
+
ul
|
103
|
+
wbr
|
104
|
+
]
|
105
|
+
|
106
|
+
SPACE_CHARACTERS = %W[
|
107
|
+
\t
|
108
|
+
\n
|
109
|
+
\x0B
|
110
|
+
\x0C
|
111
|
+
\x20
|
112
|
+
\r
|
113
|
+
]
|
114
|
+
|
115
|
+
TABLE_INSERT_MODE_ELEMENTS = %w[
|
116
|
+
table
|
117
|
+
tbody
|
118
|
+
tfoot
|
119
|
+
thead
|
120
|
+
tr
|
121
|
+
]
|
122
|
+
|
123
|
+
ASCII_LOWERCASE = ('a'..'z').to_a.join('')
|
124
|
+
ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
|
125
|
+
ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
|
126
|
+
DIGITS = '0'..'9'
|
127
|
+
HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
|
128
|
+
|
129
|
+
# Heading elements need to be ordered
|
130
|
+
HEADING_ELEMENTS = %w[
|
131
|
+
h1
|
132
|
+
h2
|
133
|
+
h3
|
134
|
+
h4
|
135
|
+
h5
|
136
|
+
h6
|
137
|
+
]
|
138
|
+
|
139
|
+
# XXX What about event-source and command?
|
140
|
+
VOID_ELEMENTS = %w[
|
141
|
+
base
|
142
|
+
link
|
143
|
+
meta
|
144
|
+
hr
|
145
|
+
br
|
146
|
+
img
|
147
|
+
embed
|
148
|
+
param
|
149
|
+
area
|
150
|
+
col
|
151
|
+
input
|
152
|
+
]
|
153
|
+
|
154
|
+
CDATA_ELEMENTS = %w[title textarea]
|
155
|
+
|
156
|
+
RCDATA_ELEMENTS = %w[
|
157
|
+
style
|
158
|
+
script
|
159
|
+
xmp
|
160
|
+
iframe
|
161
|
+
noembed
|
162
|
+
noframes
|
163
|
+
noscript
|
164
|
+
]
|
165
|
+
|
166
|
+
BOOLEAN_ATTRIBUTES = {
|
167
|
+
:global => %w[irrelevant],
|
168
|
+
'style' => %w[scoped],
|
169
|
+
'img' => %w[ismap],
|
170
|
+
'audio' => %w[autoplay controls],
|
171
|
+
'video' => %w[autoplay controls],
|
172
|
+
'script' => %w[defer async],
|
173
|
+
'details' => %w[open],
|
174
|
+
'datagrid' => %w[multiple disabled],
|
175
|
+
'command' => %w[hidden disabled checked default],
|
176
|
+
'menu' => %w[autosubmit],
|
177
|
+
'fieldset' => %w[disabled readonly],
|
178
|
+
'option' => %w[disabled readonly selected],
|
179
|
+
'optgroup' => %w[disabled readonly],
|
180
|
+
'button' => %w[disabled autofocus],
|
181
|
+
'input' => %w[disabled readonly required autofocus checked ismap],
|
182
|
+
'select' => %w[disabled readonly autofocus multiple],
|
183
|
+
'output' => %w[disabled readonly]
|
184
|
+
|
185
|
+
}
|
186
|
+
|
187
|
+
# entitiesWindows1252 has to be _ordered_ and needs to have an index.
|
188
|
+
ENTITIES_WINDOWS1252 = [
|
189
|
+
8364, # 0x80 0x20AC EURO SIGN
|
190
|
+
65533, # 0x81 UNDEFINED
|
191
|
+
8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
|
192
|
+
402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
|
193
|
+
8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
|
194
|
+
8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
|
195
|
+
8224, # 0x86 0x2020 DAGGER
|
196
|
+
8225, # 0x87 0x2021 DOUBLE DAGGER
|
197
|
+
710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
|
198
|
+
8240, # 0x89 0x2030 PER MILLE SIGN
|
199
|
+
352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
|
200
|
+
8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
201
|
+
338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
|
202
|
+
65533, # 0x8D UNDEFINED
|
203
|
+
381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
|
204
|
+
65533, # 0x8F UNDEFINED
|
205
|
+
65533, # 0x90 UNDEFINED
|
206
|
+
8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
|
207
|
+
8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
|
208
|
+
8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
|
209
|
+
8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
|
210
|
+
8226, # 0x95 0x2022 BULLET
|
211
|
+
8211, # 0x96 0x2013 EN DASH
|
212
|
+
8212, # 0x97 0x2014 EM DASH
|
213
|
+
732, # 0x98 0x02DC SMALL TILDE
|
214
|
+
8482, # 0x99 0x2122 TRADE MARK SIGN
|
215
|
+
353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
|
216
|
+
8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
217
|
+
339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
|
218
|
+
65533, # 0x9D UNDEFINED
|
219
|
+
382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
|
220
|
+
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
|
221
|
+
]
|
222
|
+
|
223
|
+
# ENTITIES was generated from Python using the following code:
|
224
|
+
#
|
225
|
+
# import constants
|
226
|
+
# entities = constants.entities.items()
|
227
|
+
# entities.sort()
|
228
|
+
# list = [ ' '.join([repr(entity), '=>', ord(value)<128 and
|
229
|
+
# repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
|
230
|
+
# for entity, value in entities]
|
231
|
+
# print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
|
232
|
+
|
233
|
+
ENTITIES = {
|
234
|
+
'AElig' => "\xc3\x86",
|
235
|
+
'AElig;' => "\xc3\x86",
|
236
|
+
'AMP' => '&',
|
237
|
+
'AMP;' => '&',
|
238
|
+
'Aacute' => "\xc3\x81",
|
239
|
+
'Aacute;' => "\xc3\x81",
|
240
|
+
'Acirc' => "\xc3\x82",
|
241
|
+
'Acirc;' => "\xc3\x82",
|
242
|
+
'Agrave' => "\xc3\x80",
|
243
|
+
'Agrave;' => "\xc3\x80",
|
244
|
+
'Alpha;' => "\xce\x91",
|
245
|
+
'Aring' => "\xc3\x85",
|
246
|
+
'Aring;' => "\xc3\x85",
|
247
|
+
'Atilde' => "\xc3\x83",
|
248
|
+
'Atilde;' => "\xc3\x83",
|
249
|
+
'Auml' => "\xc3\x84",
|
250
|
+
'Auml;' => "\xc3\x84",
|
251
|
+
'Beta;' => "\xce\x92",
|
252
|
+
'COPY' => "\xc2\xa9",
|
253
|
+
'COPY;' => "\xc2\xa9",
|
254
|
+
'Ccedil' => "\xc3\x87",
|
255
|
+
'Ccedil;' => "\xc3\x87",
|
256
|
+
'Chi;' => "\xce\xa7",
|
257
|
+
'Dagger;' => "\xe2\x80\xa1",
|
258
|
+
'Delta;' => "\xce\x94",
|
259
|
+
'ETH' => "\xc3\x90",
|
260
|
+
'ETH;' => "\xc3\x90",
|
261
|
+
'Eacute' => "\xc3\x89",
|
262
|
+
'Eacute;' => "\xc3\x89",
|
263
|
+
'Ecirc' => "\xc3\x8a",
|
264
|
+
'Ecirc;' => "\xc3\x8a",
|
265
|
+
'Egrave' => "\xc3\x88",
|
266
|
+
'Egrave;' => "\xc3\x88",
|
267
|
+
'Epsilon;' => "\xce\x95",
|
268
|
+
'Eta;' => "\xce\x97",
|
269
|
+
'Euml' => "\xc3\x8b",
|
270
|
+
'Euml;' => "\xc3\x8b",
|
271
|
+
'GT' => '>',
|
272
|
+
'GT;' => '>',
|
273
|
+
'Gamma;' => "\xce\x93",
|
274
|
+
'Iacute' => "\xc3\x8d",
|
275
|
+
'Iacute;' => "\xc3\x8d",
|
276
|
+
'Icirc' => "\xc3\x8e",
|
277
|
+
'Icirc;' => "\xc3\x8e",
|
278
|
+
'Igrave' => "\xc3\x8c",
|
279
|
+
'Igrave;' => "\xc3\x8c",
|
280
|
+
'Iota;' => "\xce\x99",
|
281
|
+
'Iuml' => "\xc3\x8f",
|
282
|
+
'Iuml;' => "\xc3\x8f",
|
283
|
+
'Kappa;' => "\xce\x9a",
|
284
|
+
'LT' => '<',
|
285
|
+
'LT;' => '<',
|
286
|
+
'Lambda;' => "\xce\x9b",
|
287
|
+
'Mu;' => "\xce\x9c",
|
288
|
+
'Ntilde' => "\xc3\x91",
|
289
|
+
'Ntilde;' => "\xc3\x91",
|
290
|
+
'Nu;' => "\xce\x9d",
|
291
|
+
'OElig;' => "\xc5\x92",
|
292
|
+
'Oacute' => "\xc3\x93",
|
293
|
+
'Oacute;' => "\xc3\x93",
|
294
|
+
'Ocirc' => "\xc3\x94",
|
295
|
+
'Ocirc;' => "\xc3\x94",
|
296
|
+
'Ograve' => "\xc3\x92",
|
297
|
+
'Ograve;' => "\xc3\x92",
|
298
|
+
'Omega;' => "\xce\xa9",
|
299
|
+
'Omicron;' => "\xce\x9f",
|
300
|
+
'Oslash' => "\xc3\x98",
|
301
|
+
'Oslash;' => "\xc3\x98",
|
302
|
+
'Otilde' => "\xc3\x95",
|
303
|
+
'Otilde;' => "\xc3\x95",
|
304
|
+
'Ouml' => "\xc3\x96",
|
305
|
+
'Ouml;' => "\xc3\x96",
|
306
|
+
'Phi;' => "\xce\xa6",
|
307
|
+
'Pi;' => "\xce\xa0",
|
308
|
+
'Prime;' => "\xe2\x80\xb3",
|
309
|
+
'Psi;' => "\xce\xa8",
|
310
|
+
'QUOT' => '"',
|
311
|
+
'QUOT;' => '"',
|
312
|
+
'REG' => "\xc2\xae",
|
313
|
+
'REG;' => "\xc2\xae",
|
314
|
+
'Rho;' => "\xce\xa1",
|
315
|
+
'Scaron;' => "\xc5\xa0",
|
316
|
+
'Sigma;' => "\xce\xa3",
|
317
|
+
'THORN' => "\xc3\x9e",
|
318
|
+
'THORN;' => "\xc3\x9e",
|
319
|
+
'TRADE;' => "\xe2\x84\xa2",
|
320
|
+
'Tau;' => "\xce\xa4",
|
321
|
+
'Theta;' => "\xce\x98",
|
322
|
+
'Uacute' => "\xc3\x9a",
|
323
|
+
'Uacute;' => "\xc3\x9a",
|
324
|
+
'Ucirc' => "\xc3\x9b",
|
325
|
+
'Ucirc;' => "\xc3\x9b",
|
326
|
+
'Ugrave' => "\xc3\x99",
|
327
|
+
'Ugrave;' => "\xc3\x99",
|
328
|
+
'Upsilon;' => "\xce\xa5",
|
329
|
+
'Uuml' => "\xc3\x9c",
|
330
|
+
'Uuml;' => "\xc3\x9c",
|
331
|
+
'Xi;' => "\xce\x9e",
|
332
|
+
'Yacute' => "\xc3\x9d",
|
333
|
+
'Yacute;' => "\xc3\x9d",
|
334
|
+
'Yuml;' => "\xc5\xb8",
|
335
|
+
'Zeta;' => "\xce\x96",
|
336
|
+
'aacute' => "\xc3\xa1",
|
337
|
+
'aacute;' => "\xc3\xa1",
|
338
|
+
'acirc' => "\xc3\xa2",
|
339
|
+
'acirc;' => "\xc3\xa2",
|
340
|
+
'acute' => "\xc2\xb4",
|
341
|
+
'acute;' => "\xc2\xb4",
|
342
|
+
'aelig' => "\xc3\xa6",
|
343
|
+
'aelig;' => "\xc3\xa6",
|
344
|
+
'agrave' => "\xc3\xa0",
|
345
|
+
'agrave;' => "\xc3\xa0",
|
346
|
+
'alefsym;' => "\xe2\x84\xb5",
|
347
|
+
'alpha;' => "\xce\xb1",
|
348
|
+
'amp' => '&',
|
349
|
+
'amp;' => '&',
|
350
|
+
'and;' => "\xe2\x88\xa7",
|
351
|
+
'ang;' => "\xe2\x88\xa0",
|
352
|
+
'apos;' => "'",
|
353
|
+
'aring' => "\xc3\xa5",
|
354
|
+
'aring;' => "\xc3\xa5",
|
355
|
+
'asymp;' => "\xe2\x89\x88",
|
356
|
+
'atilde' => "\xc3\xa3",
|
357
|
+
'atilde;' => "\xc3\xa3",
|
358
|
+
'auml' => "\xc3\xa4",
|
359
|
+
'auml;' => "\xc3\xa4",
|
360
|
+
'bdquo;' => "\xe2\x80\x9e",
|
361
|
+
'beta;' => "\xce\xb2",
|
362
|
+
'brvbar' => "\xc2\xa6",
|
363
|
+
'brvbar;' => "\xc2\xa6",
|
364
|
+
'bull;' => "\xe2\x80\xa2",
|
365
|
+
'cap;' => "\xe2\x88\xa9",
|
366
|
+
'ccedil' => "\xc3\xa7",
|
367
|
+
'ccedil;' => "\xc3\xa7",
|
368
|
+
'cedil' => "\xc2\xb8",
|
369
|
+
'cedil;' => "\xc2\xb8",
|
370
|
+
'cent' => "\xc2\xa2",
|
371
|
+
'cent;' => "\xc2\xa2",
|
372
|
+
'chi;' => "\xcf\x87",
|
373
|
+
'circ;' => "\xcb\x86",
|
374
|
+
'clubs;' => "\xe2\x99\xa3",
|
375
|
+
'cong;' => "\xe2\x89\x85",
|
376
|
+
'copy' => "\xc2\xa9",
|
377
|
+
'copy;' => "\xc2\xa9",
|
378
|
+
'crarr;' => "\xe2\x86\xb5",
|
379
|
+
'cup;' => "\xe2\x88\xaa",
|
380
|
+
'curren' => "\xc2\xa4",
|
381
|
+
'curren;' => "\xc2\xa4",
|
382
|
+
'dArr;' => "\xe2\x87\x93",
|
383
|
+
'dagger;' => "\xe2\x80\xa0",
|
384
|
+
'darr;' => "\xe2\x86\x93",
|
385
|
+
'deg' => "\xc2\xb0",
|
386
|
+
'deg;' => "\xc2\xb0",
|
387
|
+
'delta;' => "\xce\xb4",
|
388
|
+
'diams;' => "\xe2\x99\xa6",
|
389
|
+
'divide' => "\xc3\xb7",
|
390
|
+
'divide;' => "\xc3\xb7",
|
391
|
+
'eacute' => "\xc3\xa9",
|
392
|
+
'eacute;' => "\xc3\xa9",
|
393
|
+
'ecirc' => "\xc3\xaa",
|
394
|
+
'ecirc;' => "\xc3\xaa",
|
395
|
+
'egrave' => "\xc3\xa8",
|
396
|
+
'egrave;' => "\xc3\xa8",
|
397
|
+
'empty;' => "\xe2\x88\x85",
|
398
|
+
'emsp;' => "\xe2\x80\x83",
|
399
|
+
'ensp;' => "\xe2\x80\x82",
|
400
|
+
'epsilon;' => "\xce\xb5",
|
401
|
+
'equiv;' => "\xe2\x89\xa1",
|
402
|
+
'eta;' => "\xce\xb7",
|
403
|
+
'eth' => "\xc3\xb0",
|
404
|
+
'eth;' => "\xc3\xb0",
|
405
|
+
'euml' => "\xc3\xab",
|
406
|
+
'euml;' => "\xc3\xab",
|
407
|
+
'euro;' => "\xe2\x82\xac",
|
408
|
+
'exist;' => "\xe2\x88\x83",
|
409
|
+
'fnof;' => "\xc6\x92",
|
410
|
+
'forall;' => "\xe2\x88\x80",
|
411
|
+
'frac12' => "\xc2\xbd",
|
412
|
+
'frac12;' => "\xc2\xbd",
|
413
|
+
'frac14' => "\xc2\xbc",
|
414
|
+
'frac14;' => "\xc2\xbc",
|
415
|
+
'frac34' => "\xc2\xbe",
|
416
|
+
'frac34;' => "\xc2\xbe",
|
417
|
+
'frasl;' => "\xe2\x81\x84",
|
418
|
+
'gamma;' => "\xce\xb3",
|
419
|
+
'ge;' => "\xe2\x89\xa5",
|
420
|
+
'gt' => '>',
|
421
|
+
'gt;' => '>',
|
422
|
+
'hArr;' => "\xe2\x87\x94",
|
423
|
+
'harr;' => "\xe2\x86\x94",
|
424
|
+
'hearts;' => "\xe2\x99\xa5",
|
425
|
+
'hellip;' => "\xe2\x80\xa6",
|
426
|
+
'iacute' => "\xc3\xad",
|
427
|
+
'iacute;' => "\xc3\xad",
|
428
|
+
'icirc' => "\xc3\xae",
|
429
|
+
'icirc;' => "\xc3\xae",
|
430
|
+
'iexcl' => "\xc2\xa1",
|
431
|
+
'iexcl;' => "\xc2\xa1",
|
432
|
+
'igrave' => "\xc3\xac",
|
433
|
+
'igrave;' => "\xc3\xac",
|
434
|
+
'image;' => "\xe2\x84\x91",
|
435
|
+
'infin;' => "\xe2\x88\x9e",
|
436
|
+
'int;' => "\xe2\x88\xab",
|
437
|
+
'iota;' => "\xce\xb9",
|
438
|
+
'iquest' => "\xc2\xbf",
|
439
|
+
'iquest;' => "\xc2\xbf",
|
440
|
+
'isin;' => "\xe2\x88\x88",
|
441
|
+
'iuml' => "\xc3\xaf",
|
442
|
+
'iuml;' => "\xc3\xaf",
|
443
|
+
'kappa;' => "\xce\xba",
|
444
|
+
'lArr;' => "\xe2\x87\x90",
|
445
|
+
'lambda;' => "\xce\xbb",
|
446
|
+
'lang;' => "\xe2\x9f\xa8",
|
447
|
+
'laquo' => "\xc2\xab",
|
448
|
+
'laquo;' => "\xc2\xab",
|
449
|
+
'larr;' => "\xe2\x86\x90",
|
450
|
+
'lceil;' => "\xe2\x8c\x88",
|
451
|
+
'ldquo;' => "\xe2\x80\x9c",
|
452
|
+
'le;' => "\xe2\x89\xa4",
|
453
|
+
'lfloor;' => "\xe2\x8c\x8a",
|
454
|
+
'lowast;' => "\xe2\x88\x97",
|
455
|
+
'loz;' => "\xe2\x97\x8a",
|
456
|
+
'lrm;' => "\xe2\x80\x8e",
|
457
|
+
'lsaquo;' => "\xe2\x80\xb9",
|
458
|
+
'lsquo;' => "\xe2\x80\x98",
|
459
|
+
'lt' => '<',
|
460
|
+
'lt;' => '<',
|
461
|
+
'macr' => "\xc2\xaf",
|
462
|
+
'macr;' => "\xc2\xaf",
|
463
|
+
'mdash;' => "\xe2\x80\x94",
|
464
|
+
'micro' => "\xc2\xb5",
|
465
|
+
'micro;' => "\xc2\xb5",
|
466
|
+
'middot' => "\xc2\xb7",
|
467
|
+
'middot;' => "\xc2\xb7",
|
468
|
+
'minus;' => "\xe2\x88\x92",
|
469
|
+
'mu;' => "\xce\xbc",
|
470
|
+
'nabla;' => "\xe2\x88\x87",
|
471
|
+
'nbsp' => "\xc2\xa0",
|
472
|
+
'nbsp;' => "\xc2\xa0",
|
473
|
+
'ndash;' => "\xe2\x80\x93",
|
474
|
+
'ne;' => "\xe2\x89\xa0",
|
475
|
+
'ni;' => "\xe2\x88\x8b",
|
476
|
+
'not' => "\xc2\xac",
|
477
|
+
'not;' => "\xc2\xac",
|
478
|
+
'notin;' => "\xe2\x88\x89",
|
479
|
+
'nsub;' => "\xe2\x8a\x84",
|
480
|
+
'ntilde' => "\xc3\xb1",
|
481
|
+
'ntilde;' => "\xc3\xb1",
|
482
|
+
'nu;' => "\xce\xbd",
|
483
|
+
'oacute' => "\xc3\xb3",
|
484
|
+
'oacute;' => "\xc3\xb3",
|
485
|
+
'ocirc' => "\xc3\xb4",
|
486
|
+
'ocirc;' => "\xc3\xb4",
|
487
|
+
'oelig;' => "\xc5\x93",
|
488
|
+
'ograve' => "\xc3\xb2",
|
489
|
+
'ograve;' => "\xc3\xb2",
|
490
|
+
'oline;' => "\xe2\x80\xbe",
|
491
|
+
'omega;' => "\xcf\x89",
|
492
|
+
'omicron;' => "\xce\xbf",
|
493
|
+
'oplus;' => "\xe2\x8a\x95",
|
494
|
+
'or;' => "\xe2\x88\xa8",
|
495
|
+
'ordf' => "\xc2\xaa",
|
496
|
+
'ordf;' => "\xc2\xaa",
|
497
|
+
'ordm' => "\xc2\xba",
|
498
|
+
'ordm;' => "\xc2\xba",
|
499
|
+
'oslash' => "\xc3\xb8",
|
500
|
+
'oslash;' => "\xc3\xb8",
|
501
|
+
'otilde' => "\xc3\xb5",
|
502
|
+
'otilde;' => "\xc3\xb5",
|
503
|
+
'otimes;' => "\xe2\x8a\x97",
|
504
|
+
'ouml' => "\xc3\xb6",
|
505
|
+
'ouml;' => "\xc3\xb6",
|
506
|
+
'para' => "\xc2\xb6",
|
507
|
+
'para;' => "\xc2\xb6",
|
508
|
+
'part;' => "\xe2\x88\x82",
|
509
|
+
'permil;' => "\xe2\x80\xb0",
|
510
|
+
'perp;' => "\xe2\x8a\xa5",
|
511
|
+
'phi;' => "\xcf\x86",
|
512
|
+
'pi;' => "\xcf\x80",
|
513
|
+
'piv;' => "\xcf\x96",
|
514
|
+
'plusmn' => "\xc2\xb1",
|
515
|
+
'plusmn;' => "\xc2\xb1",
|
516
|
+
'pound' => "\xc2\xa3",
|
517
|
+
'pound;' => "\xc2\xa3",
|
518
|
+
'prime;' => "\xe2\x80\xb2",
|
519
|
+
'prod;' => "\xe2\x88\x8f",
|
520
|
+
'prop;' => "\xe2\x88\x9d",
|
521
|
+
'psi;' => "\xcf\x88",
|
522
|
+
'quot' => '"',
|
523
|
+
'quot;' => '"',
|
524
|
+
'rArr;' => "\xe2\x87\x92",
|
525
|
+
'radic;' => "\xe2\x88\x9a",
|
526
|
+
'rang;' => "\xe2\x9f\xa9",
|
527
|
+
'raquo' => "\xc2\xbb",
|
528
|
+
'raquo;' => "\xc2\xbb",
|
529
|
+
'rarr;' => "\xe2\x86\x92",
|
530
|
+
'rceil;' => "\xe2\x8c\x89",
|
531
|
+
'rdquo;' => "\xe2\x80\x9d",
|
532
|
+
'real;' => "\xe2\x84\x9c",
|
533
|
+
'reg' => "\xc2\xae",
|
534
|
+
'reg;' => "\xc2\xae",
|
535
|
+
'rfloor;' => "\xe2\x8c\x8b",
|
536
|
+
'rho;' => "\xcf\x81",
|
537
|
+
'rlm;' => "\xe2\x80\x8f",
|
538
|
+
'rsaquo;' => "\xe2\x80\xba",
|
539
|
+
'rsquo;' => "\xe2\x80\x99",
|
540
|
+
'sbquo;' => "\xe2\x80\x9a",
|
541
|
+
'scaron;' => "\xc5\xa1",
|
542
|
+
'sdot;' => "\xe2\x8b\x85",
|
543
|
+
'sect' => "\xc2\xa7",
|
544
|
+
'sect;' => "\xc2\xa7",
|
545
|
+
'shy' => "\xc2\xad",
|
546
|
+
'shy;' => "\xc2\xad",
|
547
|
+
'sigma;' => "\xcf\x83",
|
548
|
+
'sigmaf;' => "\xcf\x82",
|
549
|
+
'sim;' => "\xe2\x88\xbc",
|
550
|
+
'spades;' => "\xe2\x99\xa0",
|
551
|
+
'sub;' => "\xe2\x8a\x82",
|
552
|
+
'sube;' => "\xe2\x8a\x86",
|
553
|
+
'sum;' => "\xe2\x88\x91",
|
554
|
+
'sup1' => "\xc2\xb9",
|
555
|
+
'sup1;' => "\xc2\xb9",
|
556
|
+
'sup2' => "\xc2\xb2",
|
557
|
+
'sup2;' => "\xc2\xb2",
|
558
|
+
'sup3' => "\xc2\xb3",
|
559
|
+
'sup3;' => "\xc2\xb3",
|
560
|
+
'sup;' => "\xe2\x8a\x83",
|
561
|
+
'supe;' => "\xe2\x8a\x87",
|
562
|
+
'szlig' => "\xc3\x9f",
|
563
|
+
'szlig;' => "\xc3\x9f",
|
564
|
+
'tau;' => "\xcf\x84",
|
565
|
+
'there4;' => "\xe2\x88\xb4",
|
566
|
+
'theta;' => "\xce\xb8",
|
567
|
+
'thetasym;' => "\xcf\x91",
|
568
|
+
'thinsp;' => "\xe2\x80\x89",
|
569
|
+
'thorn' => "\xc3\xbe",
|
570
|
+
'thorn;' => "\xc3\xbe",
|
571
|
+
'tilde;' => "\xcb\x9c",
|
572
|
+
'times' => "\xc3\x97",
|
573
|
+
'times;' => "\xc3\x97",
|
574
|
+
'trade;' => "\xe2\x84\xa2",
|
575
|
+
'uArr;' => "\xe2\x87\x91",
|
576
|
+
'uacute' => "\xc3\xba",
|
577
|
+
'uacute;' => "\xc3\xba",
|
578
|
+
'uarr;' => "\xe2\x86\x91",
|
579
|
+
'ucirc' => "\xc3\xbb",
|
580
|
+
'ucirc;' => "\xc3\xbb",
|
581
|
+
'ugrave' => "\xc3\xb9",
|
582
|
+
'ugrave;' => "\xc3\xb9",
|
583
|
+
'uml' => "\xc2\xa8",
|
584
|
+
'uml;' => "\xc2\xa8",
|
585
|
+
'upsih;' => "\xcf\x92",
|
586
|
+
'upsilon;' => "\xcf\x85",
|
587
|
+
'uuml' => "\xc3\xbc",
|
588
|
+
'uuml;' => "\xc3\xbc",
|
589
|
+
'weierp;' => "\xe2\x84\x98",
|
590
|
+
'xi;' => "\xce\xbe",
|
591
|
+
'yacute' => "\xc3\xbd",
|
592
|
+
'yacute;' => "\xc3\xbd",
|
593
|
+
'yen' => "\xc2\xa5",
|
594
|
+
'yen;' => "\xc2\xa5",
|
595
|
+
'yuml' => "\xc3\xbf",
|
596
|
+
'yuml;' => "\xc3\xbf",
|
597
|
+
'zeta;' => "\xce\xb6",
|
598
|
+
'zwj;' => "\xe2\x80\x8d",
|
599
|
+
'zwnj;' => "\xe2\x80\x8c"
|
600
|
+
}
|
601
|
+
|
602
|
+
ENCODINGS = %w[
|
603
|
+
ansi_x3.4-1968
|
604
|
+
iso-ir-6
|
605
|
+
ansi_x3.4-1986
|
606
|
+
iso_646.irv:1991
|
607
|
+
ascii
|
608
|
+
iso646-us
|
609
|
+
us-ascii
|
610
|
+
us
|
611
|
+
ibm367
|
612
|
+
cp367
|
613
|
+
csascii
|
614
|
+
ks_c_5601-1987
|
615
|
+
korean
|
616
|
+
iso-2022-kr
|
617
|
+
csiso2022kr
|
618
|
+
euc-kr
|
619
|
+
iso-2022-jp
|
620
|
+
csiso2022jp
|
621
|
+
iso-2022-jp-2
|
622
|
+
iso-ir-58
|
623
|
+
chinese
|
624
|
+
csiso58gb231280
|
625
|
+
iso_8859-1:1987
|
626
|
+
iso-ir-100
|
627
|
+
iso_8859-1
|
628
|
+
iso-8859-1
|
629
|
+
latin1
|
630
|
+
l1
|
631
|
+
ibm819
|
632
|
+
cp819
|
633
|
+
csisolatin1
|
634
|
+
iso_8859-2:1987
|
635
|
+
iso-ir-101
|
636
|
+
iso_8859-2
|
637
|
+
iso-8859-2
|
638
|
+
latin2
|
639
|
+
l2
|
640
|
+
csisolatin2
|
641
|
+
iso_8859-3:1988
|
642
|
+
iso-ir-109
|
643
|
+
iso_8859-3
|
644
|
+
iso-8859-3
|
645
|
+
latin3
|
646
|
+
l3
|
647
|
+
csisolatin3
|
648
|
+
iso_8859-4:1988
|
649
|
+
iso-ir-110
|
650
|
+
iso_8859-4
|
651
|
+
iso-8859-4
|
652
|
+
latin4
|
653
|
+
l4
|
654
|
+
csisolatin4
|
655
|
+
iso_8859-6:1987
|
656
|
+
iso-ir-127
|
657
|
+
iso_8859-6
|
658
|
+
iso-8859-6
|
659
|
+
ecma-114
|
660
|
+
asmo-708
|
661
|
+
arabic
|
662
|
+
csisolatinarabic
|
663
|
+
iso_8859-7:1987
|
664
|
+
iso-ir-126
|
665
|
+
iso_8859-7
|
666
|
+
iso-8859-7
|
667
|
+
elot_928
|
668
|
+
ecma-118
|
669
|
+
greek
|
670
|
+
greek8
|
671
|
+
csisolatingreek
|
672
|
+
iso_8859-8:1988
|
673
|
+
iso-ir-138
|
674
|
+
iso_8859-8
|
675
|
+
iso-8859-8
|
676
|
+
hebrew
|
677
|
+
csisolatinhebrew
|
678
|
+
iso_8859-5:1988
|
679
|
+
iso-ir-144
|
680
|
+
iso_8859-5
|
681
|
+
iso-8859-5
|
682
|
+
cyrillic
|
683
|
+
csisolatincyrillic
|
684
|
+
iso_8859-9:1989
|
685
|
+
iso-ir-148
|
686
|
+
iso_8859-9
|
687
|
+
iso-8859-9
|
688
|
+
latin5
|
689
|
+
l5
|
690
|
+
csisolatin5
|
691
|
+
iso-8859-10
|
692
|
+
iso-ir-157
|
693
|
+
l6
|
694
|
+
iso_8859-10:1992
|
695
|
+
csisolatin6
|
696
|
+
latin6
|
697
|
+
hp-roman8
|
698
|
+
roman8
|
699
|
+
r8
|
700
|
+
ibm037
|
701
|
+
cp037
|
702
|
+
csibm037
|
703
|
+
ibm424
|
704
|
+
cp424
|
705
|
+
csibm424
|
706
|
+
ibm437
|
707
|
+
cp437
|
708
|
+
437
|
709
|
+
cspc8codepage437
|
710
|
+
ibm500
|
711
|
+
cp500
|
712
|
+
csibm500
|
713
|
+
ibm775
|
714
|
+
cp775
|
715
|
+
cspc775baltic
|
716
|
+
ibm850
|
717
|
+
cp850
|
718
|
+
850
|
719
|
+
cspc850multilingual
|
720
|
+
ibm852
|
721
|
+
cp852
|
722
|
+
852
|
723
|
+
cspcp852
|
724
|
+
ibm855
|
725
|
+
cp855
|
726
|
+
855
|
727
|
+
csibm855
|
728
|
+
ibm857
|
729
|
+
cp857
|
730
|
+
857
|
731
|
+
csibm857
|
732
|
+
ibm860
|
733
|
+
cp860
|
734
|
+
860
|
735
|
+
csibm860
|
736
|
+
ibm861
|
737
|
+
cp861
|
738
|
+
861
|
739
|
+
cp-is
|
740
|
+
csibm861
|
741
|
+
ibm862
|
742
|
+
cp862
|
743
|
+
862
|
744
|
+
cspc862latinhebrew
|
745
|
+
ibm863
|
746
|
+
cp863
|
747
|
+
863
|
748
|
+
csibm863
|
749
|
+
ibm864
|
750
|
+
cp864
|
751
|
+
csibm864
|
752
|
+
ibm865
|
753
|
+
cp865
|
754
|
+
865
|
755
|
+
csibm865
|
756
|
+
ibm866
|
757
|
+
cp866
|
758
|
+
866
|
759
|
+
csibm866
|
760
|
+
ibm869
|
761
|
+
cp869
|
762
|
+
869
|
763
|
+
cp-gr
|
764
|
+
csibm869
|
765
|
+
ibm1026
|
766
|
+
cp1026
|
767
|
+
csibm1026
|
768
|
+
koi8-r
|
769
|
+
cskoi8r
|
770
|
+
koi8-u
|
771
|
+
big5-hkscs
|
772
|
+
ptcp154
|
773
|
+
csptcp154
|
774
|
+
pt154
|
775
|
+
cp154
|
776
|
+
utf-7
|
777
|
+
utf-16be
|
778
|
+
utf-16le
|
779
|
+
utf-16
|
780
|
+
utf-8
|
781
|
+
iso-8859-13
|
782
|
+
iso-8859-14
|
783
|
+
iso-ir-199
|
784
|
+
iso_8859-14:1998
|
785
|
+
iso_8859-14
|
786
|
+
latin8
|
787
|
+
iso-celtic
|
788
|
+
l8
|
789
|
+
iso-8859-15
|
790
|
+
iso_8859-15
|
791
|
+
iso-8859-16
|
792
|
+
iso-ir-226
|
793
|
+
iso_8859-16:2001
|
794
|
+
iso_8859-16
|
795
|
+
latin10
|
796
|
+
l10
|
797
|
+
gbk
|
798
|
+
cp936
|
799
|
+
ms936
|
800
|
+
gb18030
|
801
|
+
shift_jis
|
802
|
+
ms_kanji
|
803
|
+
csshiftjis
|
804
|
+
euc-jp
|
805
|
+
gb2312
|
806
|
+
big5
|
807
|
+
csbig5
|
808
|
+
windows-1250
|
809
|
+
windows-1251
|
810
|
+
windows-1252
|
811
|
+
windows-1253
|
812
|
+
windows-1254
|
813
|
+
windows-1255
|
814
|
+
windows-1256
|
815
|
+
windows-1257
|
816
|
+
windows-1258
|
817
|
+
tis-620
|
818
|
+
hz-gb-2312
|
819
|
+
]
|
820
|
+
|
821
|
+
E = {
|
822
|
+
"null-character" =>
|
823
|
+
_("Null character in input stream, replaced with U+FFFD."),
|
824
|
+
"incorrectly-placed-solidus" =>
|
825
|
+
_("Solidus (/) incorrectly placed in tag."),
|
826
|
+
"incorrect-cr-newline-entity" =>
|
827
|
+
_("Incorrect CR newline entity, replaced with LF."),
|
828
|
+
"illegal-windows-1252-entity" =>
|
829
|
+
_("Entity used with illegal number (windows-1252 reference)."),
|
830
|
+
"cant-convert-numeric-entity" =>
|
831
|
+
_("Numeric entity couldn't be converted to character " +
|
832
|
+
"(codepoint U+%(charAsInt)08x)."),
|
833
|
+
"illegal-codepoint-for-numeric-entity" =>
|
834
|
+
_("Numeric entity represents an illegal codepoint=> " +
|
835
|
+
"U+%(charAsInt)08x."),
|
836
|
+
"numeric-entity-without-semicolon" =>
|
837
|
+
_("Numeric entity didn't end with ';'."),
|
838
|
+
"expected-numeric-entity-but-got-eof" =>
|
839
|
+
_("Numeric entity expected. Got end of file instead."),
|
840
|
+
"expected-numeric-entity" =>
|
841
|
+
_("Numeric entity expected but none found."),
|
842
|
+
"named-entity-without-semicolon" =>
|
843
|
+
_("Named entity didn't end with ';'."),
|
844
|
+
"expected-named-entity" =>
|
845
|
+
_("Named entity expected. Got none."),
|
846
|
+
"attributes-in-end-tag" =>
|
847
|
+
_("End tag contains unexpected attributes."),
|
848
|
+
"expected-tag-name-but-got-right-bracket" =>
|
849
|
+
_("Expected tag name. Got '>' instead."),
|
850
|
+
"expected-tag-name-but-got-question-mark" =>
|
851
|
+
_("Expected tag name. Got '?' instead. (HTML doesn't " +
|
852
|
+
"support processing instructions.)"),
|
853
|
+
"expected-tag-name" =>
|
854
|
+
_("Expected tag name. Got something else instead"),
|
855
|
+
"expected-closing-tag-but-got-right-bracket" =>
|
856
|
+
_("Expected closing tag. Got '>' instead. Ignoring '</>'."),
|
857
|
+
"expected-closing-tag-but-got-eof" =>
|
858
|
+
_("Expected closing tag. Unexpected end of file."),
|
859
|
+
"expected-closing-tag-but-got-char" =>
|
860
|
+
_("Expected closing tag. Unexpected character '%(data)' found."),
|
861
|
+
"eof-in-tag-name" =>
|
862
|
+
_("Unexpected end of file in the tag name."),
|
863
|
+
"expected-attribute-name-but-got-eof" =>
|
864
|
+
_("Unexpected end of file. Expected attribute name instead."),
|
865
|
+
"eof-in-attribute-name" =>
|
866
|
+
_("Unexpected end of file in attribute name."),
|
867
|
+
"duplicate-attribute" =>
|
868
|
+
_("Dropped duplicate attribute on tag."),
|
869
|
+
"expected-end-of-tag-name-but-got-eof" =>
|
870
|
+
_("Unexpected end of file. Expected = or end of tag."),
|
871
|
+
"expected-attribute-value-but-got-eof" =>
|
872
|
+
_("Unexpected end of file. Expected attribute value."),
|
873
|
+
"eof-in-attribute-value-double-quote" =>
|
874
|
+
_("Unexpected end of file in attribute value (\")."),
|
875
|
+
"eof-in-attribute-value-single-quote" =>
|
876
|
+
_("Unexpected end of file in attribute value (')."),
|
877
|
+
"eof-in-attribute-value-no-quotes" =>
|
878
|
+
_("Unexpected end of file in attribute value."),
|
879
|
+
"expected-dashes-or-doctype" =>
|
880
|
+
_("Expected '--' or 'DOCTYPE'. Not found."),
|
881
|
+
"incorrect-comment" =>
|
882
|
+
_("Incorrect comment."),
|
883
|
+
"eof-in-comment" =>
|
884
|
+
_("Unexpected end of file in comment."),
|
885
|
+
"eof-in-comment-end-dash" =>
|
886
|
+
_("Unexpected end of file in comment (-)"),
|
887
|
+
"unexpected-dash-after-double-dash-in-comment" =>
|
888
|
+
_("Unexpected '-' after '--' found in comment."),
|
889
|
+
"eof-in-comment-double-dash" =>
|
890
|
+
_("Unexpected end of file in comment (--)."),
|
891
|
+
"unexpected-char-in-comment" =>
|
892
|
+
_("Unexpected character in comment found."),
|
893
|
+
"need-space-after-doctype" =>
|
894
|
+
_("No space after literal string 'DOCTYPE'."),
|
895
|
+
"expected-doctype-name-but-got-right-bracket" =>
|
896
|
+
_("Unexpected > character. Expected DOCTYPE name."),
|
897
|
+
"expected-doctype-name-but-got-eof" =>
|
898
|
+
_("Unexpected end of file. Expected DOCTYPE name."),
|
899
|
+
"eof-in-doctype-name" =>
|
900
|
+
_("Unexpected end of file in DOCTYPE name."),
|
901
|
+
"eof-in-doctype" =>
|
902
|
+
_("Unexpected end of file in DOCTYPE."),
|
903
|
+
"expected-space-or-right-bracket-in-doctype" =>
|
904
|
+
_("Expected space or '>'. Got '%(data)'"),
|
905
|
+
"unexpected-end-of-doctype" =>
|
906
|
+
_("Unexpected end of DOCTYPE."),
|
907
|
+
"unexpected-char-in-doctype" =>
|
908
|
+
_("Unexpected character in DOCTYPE."),
|
909
|
+
"eof-in-bogus-doctype" =>
|
910
|
+
_("Unexpected end of file in bogus doctype."),
|
911
|
+
"eof-in-innerhtml" =>
|
912
|
+
_("Unexpected EOF in inner html mode."),
|
913
|
+
"unexpected-doctype" =>
|
914
|
+
_("Unexpected DOCTYPE. Ignored."),
|
915
|
+
"non-html-root" =>
|
916
|
+
_("html needs to be the first start tag."),
|
917
|
+
"expected-doctype-but-got-eof" =>
|
918
|
+
_("Unexpected End of file. Expected DOCTYPE."),
|
919
|
+
"unknown-doctype" =>
|
920
|
+
_("Erroneous DOCTYPE."),
|
921
|
+
"expected-doctype-but-got-chars" =>
|
922
|
+
_("Unexpected non-space characters. Expected DOCTYPE."),
|
923
|
+
"expected-doctype-but-got-start-tag" =>
|
924
|
+
_("Unexpected start tag (%(name)). Expected DOCTYPE."),
|
925
|
+
"expected-doctype-but-got-end-tag" =>
|
926
|
+
_("Unexpected end tag (%(name)). Expected DOCTYPE."),
|
927
|
+
"end-tag-after-implied-root" =>
|
928
|
+
_("Unexpected end tag (%(name)) after the (implied) root element."),
|
929
|
+
"expected-named-closing-tag-but-got-eof" =>
|
930
|
+
_("Unexpected end of file. Expected end tag (%(name))."),
|
931
|
+
"two-heads-are-not-better-than-one" =>
|
932
|
+
_("Unexpected start tag head in existing head. Ignored."),
|
933
|
+
"unexpected-end-tag" =>
|
934
|
+
_("Unexpected end tag (%(name)). Ignored."),
|
935
|
+
"unexpected-start-tag-out-of-my-head" =>
|
936
|
+
_("Unexpected start tag (%(name)) that can be in head. Moved."),
|
937
|
+
"unexpected-start-tag" =>
|
938
|
+
_("Unexpected start tag (%(name))."),
|
939
|
+
"missing-end-tag" =>
|
940
|
+
_("Missing end tag (%(name))."),
|
941
|
+
"missing-end-tags" =>
|
942
|
+
_("Missing end tags (%(name))."),
|
943
|
+
"unexpected-start-tag-implies-end-tag" =>
|
944
|
+
_("Unexpected start tag (%(startName)) " +
|
945
|
+
"implies end tag (%(endName))."),
|
946
|
+
"unexpected-start-tag-treated-as" =>
|
947
|
+
_("Unexpected start tag (%(originalName)). Treated as %(newName)."),
|
948
|
+
"deprecated-tag" =>
|
949
|
+
_("Unexpected start tag %(name). Don't use it!"),
|
950
|
+
"unexpected-start-tag-ignored" =>
|
951
|
+
_("Unexpected start tag %(name). Ignored."),
|
952
|
+
"expected-one-end-tag-but-got-another" =>
|
953
|
+
_("Unexpected end tag (%(gotName)). " +
|
954
|
+
"Missing end tag (%(expectedName))."),
|
955
|
+
"end-tag-too-early" =>
|
956
|
+
_("End tag (%(name)) seen too early. Expected other end tag."),
|
957
|
+
"end-tag-too-early-named" =>
|
958
|
+
_("Unexpected end tag (%(gotName)). Expected end tag (%(expectedName))."),
|
959
|
+
"end-tag-too-early-ignored" =>
|
960
|
+
_("End tag (%(name)) seen too early. Ignored."),
|
961
|
+
"adoption-agency-1.1" =>
|
962
|
+
_("End tag (%(name)) violates step 1, " +
|
963
|
+
"paragraph 1 of the adoption agency algorithm."),
|
964
|
+
"adoption-agency-1.2" =>
|
965
|
+
_("End tag (%(name)) violates step 1, " +
|
966
|
+
"paragraph 2 of the adoption agency algorithm."),
|
967
|
+
"adoption-agency-1.3" =>
|
968
|
+
_("End tag (%(name)) violates step 1, " +
|
969
|
+
"paragraph 3 of the adoption agency algorithm."),
|
970
|
+
"unexpected-end-tag-treated-as" =>
|
971
|
+
_("Unexpected end tag (%(originalName)). Treated as %(newName)."),
|
972
|
+
"no-end-tag" =>
|
973
|
+
_("This element (%(name)) has no end tag."),
|
974
|
+
"unexpected-implied-end-tag-in-table" =>
|
975
|
+
_("Unexpected implied end tag (%(name)) in the table phase."),
|
976
|
+
"unexpected-implied-end-tag-in-table-body" =>
|
977
|
+
_("Unexpected implied end tag (%(name)) in the table body phase."),
|
978
|
+
"unexpected-char-implies-table-voodoo" =>
|
979
|
+
_("Unexpected non-space characters in " +
|
980
|
+
"table context caused voodoo mode."),
|
981
|
+
"unpexted-hidden-input-in-table" =>
|
982
|
+
_("Unexpected input with type hidden in table context."),
|
983
|
+
"unexpected-start-tag-implies-table-voodoo" =>
|
984
|
+
_("Unexpected start tag (%(name)) in " +
|
985
|
+
"table context caused voodoo mode."),
|
986
|
+
"unexpected-end-tag-implies-table-voodoo" =>
|
987
|
+
_("Unexpected end tag (%(name)) in " +
|
988
|
+
"table context caused voodoo mode."),
|
989
|
+
"unexpected-cell-in-table-body" =>
|
990
|
+
_("Unexpected table cell start tag (%(name)) " +
|
991
|
+
"in the table body phase."),
|
992
|
+
"unexpected-cell-end-tag" =>
|
993
|
+
_("Got table cell end tag (%(name)) " +
|
994
|
+
"while required end tags are missing."),
|
995
|
+
"unexpected-end-tag-in-table-body" =>
|
996
|
+
_("Unexpected end tag (%(name)) in the table body phase. Ignored."),
|
997
|
+
"unexpected-implied-end-tag-in-table-row" =>
|
998
|
+
_("Unexpected implied end tag (%(name)) in the table row phase."),
|
999
|
+
"unexpected-end-tag-in-table-row" =>
|
1000
|
+
_("Unexpected end tag (%(name)) in the table row phase. Ignored."),
|
1001
|
+
"unexpected-select-in-select" =>
|
1002
|
+
_("Unexpected select start tag in the select phase " +
|
1003
|
+
"treated as select end tag."),
|
1004
|
+
"unexpected-input-in-select" =>
|
1005
|
+
_("Unexpected input start tag in the select phase."),
|
1006
|
+
"unexpected-start-tag-in-select" =>
|
1007
|
+
_("Unexpected start tag token (%(name)) in the select phase. " +
|
1008
|
+
"Ignored."),
|
1009
|
+
"unexpected-end-tag-in-select" =>
|
1010
|
+
_("Unexpected end tag (%(name)) in the select phase. Ignored."),
|
1011
|
+
"unexpected-table-element-start-tag-in-select-in-table" =>
|
1012
|
+
_("Unexpected table element start tag (%(name)s) in the select in table phase."),
|
1013
|
+
"unexpected-table-element-end-tag-in-select-in-table" =>
|
1014
|
+
_("Unexpected table element end tag (%(name)s) in the select in table phase."),
|
1015
|
+
"unexpected-char-after-body" =>
|
1016
|
+
_("Unexpected non-space characters in the after body phase."),
|
1017
|
+
"unexpected-start-tag-after-body" =>
|
1018
|
+
_("Unexpected start tag token (%(name))" +
|
1019
|
+
" in the after body phase."),
|
1020
|
+
"unexpected-end-tag-after-body" =>
|
1021
|
+
_("Unexpected end tag token (%(name))" +
|
1022
|
+
" in the after body phase."),
|
1023
|
+
"unexpected-char-in-frameset" =>
|
1024
|
+
_("Unepxected characters in the frameset phase. Characters ignored."),
|
1025
|
+
"unexpected-start-tag-in-frameset" =>
|
1026
|
+
_("Unexpected start tag token (%(name))" +
|
1027
|
+
" in the frameset phase. Ignored."),
|
1028
|
+
"unexpected-frameset-in-frameset-innerhtml" =>
|
1029
|
+
_("Unexpected end tag token (frameset) " +
|
1030
|
+
"in the frameset phase (innerHTML)."),
|
1031
|
+
"unexpected-end-tag-in-frameset" =>
|
1032
|
+
_("Unexpected end tag token (%(name))" +
|
1033
|
+
" in the frameset phase. Ignored."),
|
1034
|
+
"unexpected-char-after-frameset" =>
|
1035
|
+
_("Unexpected non-space characters in the " +
|
1036
|
+
"after frameset phase. Ignored."),
|
1037
|
+
"unexpected-start-tag-after-frameset" =>
|
1038
|
+
_("Unexpected start tag (%(name))" +
|
1039
|
+
" in the after frameset phase. Ignored."),
|
1040
|
+
"unexpected-end-tag-after-frameset" =>
|
1041
|
+
_("Unexpected end tag (%(name))" +
|
1042
|
+
" in the after frameset phase. Ignored."),
|
1043
|
+
"expected-eof-but-got-char" =>
|
1044
|
+
_("Unexpected non-space characters. Expected end of file."),
|
1045
|
+
"expected-eof-but-got-start-tag" =>
|
1046
|
+
_("Unexpected start tag (%(name))" +
|
1047
|
+
". Expected end of file."),
|
1048
|
+
"expected-eof-but-got-end-tag" =>
|
1049
|
+
_("Unexpected end tag (%(name))" +
|
1050
|
+
". Expected end of file."),
|
1051
|
+
"unexpected-end-table-in-caption" =>
|
1052
|
+
_("Unexpected end table tag in caption. Generates implied end caption."),
|
1053
|
+
"end-html-in-innerhtml" => _("Unexpected html end tag in inner html mode."),
|
1054
|
+
"expected-self-closing-tag" => _("Expected a > after the /."),
|
1055
|
+
"self-closing-end-tag" => _("Self closing end tag."),
|
1056
|
+
"eof-in-table" => _("Unexpected end of file. Expected table content."),
|
1057
|
+
"html-in-foreign-content" => _("HTML start tag \"%(name)\" in a foreign namespace context."),
|
1058
|
+
"unexpected-start-tag-in-table" => _("Unexpected %(name). Expected table content."),
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
end
|