iudex-html 1.0.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.rdoc +2 -0
- data/Manifest.txt +24 -0
- data/README.rdoc +25 -0
- data/Rakefile +53 -0
- data/build/HTML.java.erb +91 -0
- data/build/attributes +82 -0
- data/build/java_generate.rb +139 -0
- data/build/tags +130 -0
- data/lib/iudex-html.rb +56 -0
- data/lib/iudex-html/base.rb +21 -0
- data/lib/iudex-html/factory_helper.rb +95 -0
- data/lib/iudex-html/iudex-html-1.0.0.jar +0 -0
- data/pom.xml +51 -0
- data/test/html_test_helper.rb +100 -0
- data/test/setup.rb +38 -0
- data/test/test_characters_normalizer.rb +81 -0
- data/test/test_extract_filter.rb +165 -0
- data/test/test_factory_helper.rb +51 -0
- data/test/test_html_parser.rb +128 -0
- data/test/test_other_filters.rb +51 -0
- data/test/test_other_tree_filters.rb +124 -0
- data/test/test_parse_filter.rb +72 -0
- data/test/test_tree_walker.rb +94 -0
- data/test/test_word_counters.rb +96 -0
- metadata +162 -0
data/History.rdoc
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
History.rdoc
|
2
|
+
Manifest.txt
|
3
|
+
README.rdoc
|
4
|
+
Rakefile
|
5
|
+
pom.xml
|
6
|
+
build/HTML.java.erb
|
7
|
+
build/attributes
|
8
|
+
build/java_generate.rb
|
9
|
+
build/tags
|
10
|
+
lib/iudex-html/base.rb
|
11
|
+
lib/iudex-html.rb
|
12
|
+
lib/iudex-html/factory_helper.rb
|
13
|
+
test/html_test_helper.rb
|
14
|
+
test/setup.rb
|
15
|
+
test/test_characters_normalizer.rb
|
16
|
+
test/test_extract_filter.rb
|
17
|
+
test/test_factory_helper.rb
|
18
|
+
test/test_html_parser.rb
|
19
|
+
test/test_other_filters.rb
|
20
|
+
test/test_other_tree_filters.rb
|
21
|
+
test/test_parse_filter.rb
|
22
|
+
test/test_tree_walker.rb
|
23
|
+
test/test_word_counters.rb
|
24
|
+
lib/iudex-html/iudex-html-1.0.0.jar
|
data/README.rdoc
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
= iudex-html
|
2
|
+
|
3
|
+
* http://github.com/dekellum/iudex
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Iudex is a general purpose web crawler and feed processor in
|
8
|
+
ruby/java. The iudex-html gem contains filters for HTML parsing,
|
9
|
+
filtering, exracting text and links.
|
10
|
+
|
11
|
+
== License
|
12
|
+
|
13
|
+
Copyright (c) 2010-2011 David Kellum
|
14
|
+
|
15
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you
|
16
|
+
may not use this file except in compliance with the License. You
|
17
|
+
may obtain a copy of the License at:
|
18
|
+
|
19
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
20
|
+
|
21
|
+
Unless required by applicable law or agreed to in writing, software
|
22
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
23
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
24
|
+
implied. See the License for the specific language governing
|
25
|
+
permissions and limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
$LOAD_PATH << './lib'
|
4
|
+
require 'iudex-html/base'
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'rjack-tarpit', '~> 1.2'
|
8
|
+
require 'rjack-tarpit'
|
9
|
+
|
10
|
+
t = RJack::TarPit.new( 'iudex-html',
|
11
|
+
Iudex::HTML::VERSION,
|
12
|
+
:no_assembly, :java_platform )
|
13
|
+
|
14
|
+
t.specify do |h|
|
15
|
+
h.developer( "David Kellum", "dek-oss@gravitext.com" )
|
16
|
+
h.extra_deps += [ [ 'iudex-core', '~> 1.0.0' ],
|
17
|
+
[ 'rjack-nekohtml', '~> 1.9.14' ],
|
18
|
+
[ 'gravitext-xmlprod', '~> 1.4.0' ] ]
|
19
|
+
|
20
|
+
h.testlib = :minitest
|
21
|
+
h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
|
22
|
+
[ 'rjack-logback', '~> 1.0' ] ]
|
23
|
+
end
|
24
|
+
|
25
|
+
file 'Manifest.txt' => [ 'pom.xml' ]
|
26
|
+
|
27
|
+
task :check_pom_version do
|
28
|
+
t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
|
29
|
+
end
|
30
|
+
task :check_history_version do
|
31
|
+
t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
|
32
|
+
end
|
33
|
+
task :check_history_date do
|
34
|
+
t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
|
35
|
+
end
|
36
|
+
|
37
|
+
task :gem => [ :check_pom_version, :check_history_version ]
|
38
|
+
task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
|
39
|
+
task :push => [ :check_history_date ]
|
40
|
+
|
41
|
+
file 'target/.tarpit' => [ 'src/main/java/iudex/html/HTML.java' ]
|
42
|
+
|
43
|
+
file 'src/main/java/iudex/html/HTML.java' => FileList.new( "build/*" ) do
|
44
|
+
require 'build/java_generate'
|
45
|
+
puts "Generating HTML.java"
|
46
|
+
JavaGenerator.new.run
|
47
|
+
end
|
48
|
+
|
49
|
+
task :clean do
|
50
|
+
rm_f 'src/main/java/iudex/html/HTML.java'
|
51
|
+
end
|
52
|
+
|
53
|
+
t.define_tasks
|
data/build/HTML.java.erb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2010-2011 David Kellum
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
* may not use this file except in compliance with the License. You may
|
6
|
+
* obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
* implied. See the License for the specific language governing
|
14
|
+
* permissions and limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
package iudex.html;
|
18
|
+
|
19
|
+
import java.util.Arrays;
|
20
|
+
import java.util.Collections;
|
21
|
+
import java.util.List;
|
22
|
+
import java.util.HashMap;
|
23
|
+
import java.util.Map;
|
24
|
+
|
25
|
+
import iudex.html.HTMLTag.Flag;
|
26
|
+
import static iudex.html.HTMLTag.Flag.*;
|
27
|
+
|
28
|
+
import com.gravitext.xml.producer.Namespace;
|
29
|
+
import com.gravitext.xml.producer.Attribute;
|
30
|
+
|
31
|
+
/**
|
32
|
+
* HTML Tag constants
|
33
|
+
* This class is GENERATED by java_generate.rb.
|
34
|
+
*/
|
35
|
+
public class HTML
|
36
|
+
{
|
37
|
+
public static final Namespace NS_XHTML =
|
38
|
+
new Namespace( Namespace.DEFAULT, "http://www.w3.org/1999/xhtml" );
|
39
|
+
|
40
|
+
public static final Map<String,HTMLTag> TAGS =
|
41
|
+
new HashMap<String,HTMLTag>( 127 );
|
42
|
+
|
43
|
+
public static final Map<String,Attribute> ATTRIBUTES =
|
44
|
+
new HashMap<String,Attribute>( 59 );
|
45
|
+
|
46
|
+
public static final List<Attribute> EMPTY_ATTS = Collections.emptyList();
|
47
|
+
% attributes.each do |a|
|
48
|
+
% if a.desc
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Attribute <%= a.name %>: <%= a.desc %>
|
52
|
+
*/
|
53
|
+
% end
|
54
|
+
public static final Attribute <%= awidth( 'ATTR_' + const( a.name.upcase ), 5 ) %> = attr( <%= awidth( '"' + a.name + '"', 2 ) %> );
|
55
|
+
% end
|
56
|
+
|
57
|
+
% tags.each do |tag|
|
58
|
+
% targs = [ '"' + tag.name + '"' ]
|
59
|
+
% basic_atts = tag.basic_atts.map { |a| 'ATTR_' + const( a.name.upcase ) }
|
60
|
+
% targs << if basic_atts.empty?
|
61
|
+
% "EMPTY_ATTS"
|
62
|
+
% else
|
63
|
+
% "Arrays.asList( #{ basic_atts.join( ', ' ) } )"
|
64
|
+
% end
|
65
|
+
% targs += tag.flags
|
66
|
+
% if tag.desc
|
67
|
+
/**
|
68
|
+
* Tag <<%= tag.name %>>: <%= tag.desc %>
|
69
|
+
*/
|
70
|
+
% end
|
71
|
+
public static final HTMLTag <%= twidth( tag.name.upcase ) %> =
|
72
|
+
tag( <%= targs.join( ', ' ) %> );
|
73
|
+
|
74
|
+
% end
|
75
|
+
|
76
|
+
private static HTMLTag tag( String name,
|
77
|
+
List<Attribute> basicAtts,
|
78
|
+
Flag...flags )
|
79
|
+
{
|
80
|
+
HTMLTag t = new HTMLTag( name, NS_XHTML, basicAtts, flags );
|
81
|
+
TAGS.put( t.name(), t );
|
82
|
+
return t;
|
83
|
+
}
|
84
|
+
|
85
|
+
private static Attribute attr( String name )
|
86
|
+
{
|
87
|
+
Attribute a = new Attribute( name, NS_XHTML );
|
88
|
+
ATTRIBUTES.put( a.name(), a );
|
89
|
+
return a;
|
90
|
+
}
|
91
|
+
}
|
data/build/attributes
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
# HTML Attributes
|
2
|
+
#
|
3
|
+
# Copyright (c) 2010-2011 David Kellum
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
6
|
+
# may not use this file except in compliance with the License. You may
|
7
|
+
# obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
14
|
+
# implied. See the License for the specific language governing
|
15
|
+
# permissions and limitations under the License.
|
16
|
+
#
|
17
|
+
# Format:
|
18
|
+
# (CSV like) columns: name, tags, description
|
19
|
+
# Tags marked with asterisk (*): attribute is for style purposes only.
|
20
|
+
#
|
21
|
+
# Sources
|
22
|
+
# http://www.w3.org/TR/xhtml11/
|
23
|
+
# http://www.w3.org/TR/html4/
|
24
|
+
# http://www.w3schools.com/tags/ref_standardattributes.asp
|
25
|
+
# http://xhtml.com
|
26
|
+
|
27
|
+
CORE :: ALL except: base head html meta param script style title
|
28
|
+
class ,*CORE
|
29
|
+
id ,*CORE
|
30
|
+
style ,*CORE
|
31
|
+
title ,CORE, extra title
|
32
|
+
|
33
|
+
LANG :: ALL except: base br frame frameset hr iframe param
|
34
|
+
dir ,LANG, Text direction; ltr or rtl
|
35
|
+
lang ,LANG, language_code; also xml:lang
|
36
|
+
|
37
|
+
# Meta tag attributes
|
38
|
+
http-equiv ,meta, HTTP Header name
|
39
|
+
content ,meta, text
|
40
|
+
scheme ,meta, format URI
|
41
|
+
|
42
|
+
# Anchor and link attributes
|
43
|
+
charset ,a link, char_encoding of link
|
44
|
+
coords ,*a, coordinates; i.e. image map
|
45
|
+
hreflang ,link, language_code of referent
|
46
|
+
href ,a base link, URL
|
47
|
+
media ,link
|
48
|
+
name ,a, section_name anchor
|
49
|
+
rel ,a link
|
50
|
+
rev ,a link
|
51
|
+
shape ,*a
|
52
|
+
target ,*a *base *link
|
53
|
+
type ,link
|
54
|
+
|
55
|
+
# Image and some frame attributes
|
56
|
+
src ,frame img
|
57
|
+
alt ,img
|
58
|
+
height ,img *tr *th *td *iframe *object
|
59
|
+
width ,img *table *tr *th *td *iframe *object
|
60
|
+
|
61
|
+
# Table specific attributes
|
62
|
+
abbr ,tr th
|
63
|
+
align ,table tr td th iframe object
|
64
|
+
axis ,tr th
|
65
|
+
bgcolor ,*table *tr *td *th
|
66
|
+
border ,*table
|
67
|
+
cellpadding ,*table
|
68
|
+
cellspacing ,*table
|
69
|
+
char ,tr td th
|
70
|
+
charoff ,tr td th
|
71
|
+
colspan ,tr td th
|
72
|
+
frame ,*table
|
73
|
+
headers ,tr td
|
74
|
+
nowrap ,*tr *td *th
|
75
|
+
rowspan ,tr td th
|
76
|
+
rules ,*table
|
77
|
+
scope ,tr td th
|
78
|
+
summary ,table
|
79
|
+
valign ,*tr *td
|
80
|
+
|
81
|
+
# Purposefully omitted (will be dropped on parse)
|
82
|
+
# -- The event attributes on*, onmouse*, onkey*
|
@@ -0,0 +1,139 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2010-2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You may
|
9
|
+
# obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require 'erb'
|
21
|
+
require 'ostruct'
|
22
|
+
|
23
|
+
# Generator for HTML.java tags/attribute input configuration
|
24
|
+
class JavaGenerator
|
25
|
+
|
26
|
+
attr_reader :tags, :attributes
|
27
|
+
|
28
|
+
BASEDIR = File.dirname( __FILE__ )
|
29
|
+
|
30
|
+
JAVA_OUT = File.join( BASEDIR, '..', 'src',
|
31
|
+
'main', 'java', 'iudex', 'html', 'HTML.java' )
|
32
|
+
|
33
|
+
def run( java_file = JAVA_OUT )
|
34
|
+
parse_tags
|
35
|
+
parse_attributes
|
36
|
+
map_basic_attributes
|
37
|
+
generate_java( java_file )
|
38
|
+
end
|
39
|
+
|
40
|
+
FLAGS = {
|
41
|
+
'D' => 'DEPRECATED',
|
42
|
+
'I' => 'INLINE',
|
43
|
+
'M' => 'METADATA',
|
44
|
+
'B' => 'BANNED' }
|
45
|
+
|
46
|
+
def parse_tags()
|
47
|
+
@tags = []
|
48
|
+
|
49
|
+
open( File.join( BASEDIR, 'tags' ), 'r' ) do |fin|
|
50
|
+
fin.each do |line|
|
51
|
+
case line
|
52
|
+
when /^\s*#/, /^\s*$/
|
53
|
+
# ignore comment, empty lines
|
54
|
+
when /^\s*[^\s,]+\s*,[^,]*,[^,]*$/
|
55
|
+
r = line.split(',').map { |c| c.strip }
|
56
|
+
r = r.compact.reject { |c| c.empty? }
|
57
|
+
flags = r[1].split(' ').map { |f| FLAGS[f] }.compact
|
58
|
+
@tags << OpenStruct.new( :name => r[0],
|
59
|
+
:flags => flags,
|
60
|
+
:desc => r[2] )
|
61
|
+
else
|
62
|
+
raise "Parse ERROR: line [#{line}]"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
@tag_max_len = @tags.map { |t| t.name.length }.max
|
68
|
+
[ @tags ]
|
69
|
+
end
|
70
|
+
|
71
|
+
def parse_attributes()
|
72
|
+
@attributes = []
|
73
|
+
tagsets = {}
|
74
|
+
|
75
|
+
open( File.join( BASEDIR, 'attributes' ), 'r' ) do |fin|
|
76
|
+
fin.each do |line|
|
77
|
+
case line
|
78
|
+
when /^\s*#/, /^\s*$/
|
79
|
+
# ignore comment, empty lines
|
80
|
+
when /^\s*([A-Z]+)\s*::\s*ALL\s+except:(.*)$/
|
81
|
+
sname = $1
|
82
|
+
except = $2.split( ' ' ).compact.reject { |t| t.empty? }
|
83
|
+
tset = @tags.reject { |t| except.include?( t.name ) }
|
84
|
+
tset.map! { |t| t.name }
|
85
|
+
tagsets[sname] = tset
|
86
|
+
when /^\s*[^\s,]+\s*,/
|
87
|
+
r = line.split(',').map { |c| c.strip }
|
88
|
+
r = r.compact.reject { |c| c.empty? }
|
89
|
+
# FIXME: Handle attributes, desc.
|
90
|
+
|
91
|
+
btags = r[1].split(' ').compact.reject { |t| t.empty? || t =~ /^\*/ }
|
92
|
+
btags = btags.map { |t| tagsets[ t ] || t }.flatten
|
93
|
+
|
94
|
+
@attributes << OpenStruct.new( :name => r[0],
|
95
|
+
:basic_tags => btags,
|
96
|
+
:desc => r[2] )
|
97
|
+
else
|
98
|
+
raise "Parse ERROR: line [#{line}]"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def map_basic_attributes()
|
104
|
+
@tags.each do |tag|
|
105
|
+
tag.basic_atts =
|
106
|
+
@attributes.select { |attr| attr.basic_tags.include?( tag.name ) }
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
@attr_max_len = @attributes.map { |t| t.name.length }.max
|
111
|
+
[ @attributes ]
|
112
|
+
end
|
113
|
+
|
114
|
+
def twidth( val, extra = 0 )
|
115
|
+
val + ( ' ' * ( @tag_max_len - val.length + extra ) )
|
116
|
+
end
|
117
|
+
|
118
|
+
def awidth( val, extra = 0 )
|
119
|
+
val + ( ' ' * ( @attr_max_len - val.length + extra ) )
|
120
|
+
end
|
121
|
+
|
122
|
+
def const( val )
|
123
|
+
val.gsub( /\-/, '_' )
|
124
|
+
end
|
125
|
+
|
126
|
+
def generate_java( java_file )
|
127
|
+
erb_file = File.join( BASEDIR, 'HTML.java.erb' )
|
128
|
+
template = ERB.new( IO.read( erb_file ), nil, '%' )
|
129
|
+
|
130
|
+
open( java_file, 'w' ) do |fout|
|
131
|
+
fout << template.result( binding )
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
if $0 == __FILE__
|
138
|
+
JavaGenerator.new.run( *ARGV )
|
139
|
+
end
|
data/build/tags
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
# HTML Tags
|
2
|
+
#
|
3
|
+
# Copyright (c) 2010-2011 David Kellum
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
6
|
+
# may not use this file except in compliance with the License. You may
|
7
|
+
# obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
14
|
+
# implied. See the License for the specific language governing
|
15
|
+
# permissions and limitations under the License.
|
16
|
+
#
|
17
|
+
# Sources
|
18
|
+
# http://www.w3.org/TR/xhtml11/
|
19
|
+
# http://www.w3.org/TR/html4/
|
20
|
+
# http://www.w3schools.com/tags/default.asp
|
21
|
+
# http://xhtml.com/
|
22
|
+
#
|
23
|
+
# Codes:
|
24
|
+
# E :: Empty Tag
|
25
|
+
# S :: In Strict HTML 4.01/XHTML 1.0
|
26
|
+
# T :: In Transitional HTML 4.01/XHTML 1.0
|
27
|
+
# F :: In frameset annex
|
28
|
+
# D :: Deprecated
|
29
|
+
# I :: Inline elements (Note <br/> is not labeled inline.)
|
30
|
+
# M :: Metadata elements (content not visible text), i.e. head
|
31
|
+
# B :: Banned/blacklisted elements from which text should not be extracted.
|
32
|
+
|
33
|
+
a , S T F I , anchor
|
34
|
+
abbr , S T F I , abbreviation
|
35
|
+
acronym , S T F I , acronym
|
36
|
+
address , S T F , contact information for the author or owner
|
37
|
+
applet , T F D , embedded applet
|
38
|
+
area ,E S T F , area inside an image-map
|
39
|
+
b , S T F I , bold text
|
40
|
+
base ,E S T F M , default address or a default target for all links on a page
|
41
|
+
basefont ,E T F D I M , default font; color; or size for the text in a page
|
42
|
+
bdo , S T F I , the text direction
|
43
|
+
big , S T F I , big text
|
44
|
+
blockquote , S T F , long quotation
|
45
|
+
body , S T F , the document's body
|
46
|
+
br ,E S T F , single line break
|
47
|
+
button , S T F I B, push button
|
48
|
+
caption , S T F , table caption
|
49
|
+
center , T F D , centered text
|
50
|
+
cite , S T F I , citation
|
51
|
+
code , S T F I , computer code text
|
52
|
+
col ,E S T F , attribute values for one or more columns in a table
|
53
|
+
colgroup , S T F , group of columns in a table for formatting
|
54
|
+
dd , S T F , description of a term in a definition list
|
55
|
+
del , S T F I , deleted text
|
56
|
+
dfn , S T F I , definition term
|
57
|
+
dir , T F D , directory list
|
58
|
+
div , S T F , section in a document
|
59
|
+
dl , S T F , definition list
|
60
|
+
dt , S T F , term (an item) in a definition list
|
61
|
+
em , S T F I , emphasized text
|
62
|
+
fieldset , S T F B, border around elements in a form
|
63
|
+
font , T F D I , font; color; or size for text
|
64
|
+
form , S T F , form for user input
|
65
|
+
frame ,E F B, window (a frame) in a frameset
|
66
|
+
frameset , F B, set of frames
|
67
|
+
h1 , S T F , heading level 1
|
68
|
+
h2 , S T F , heading level 2
|
69
|
+
h3 , S T F , heading level 3
|
70
|
+
h4 , S T F , heading level 4
|
71
|
+
h5 , S T F , heading level 5
|
72
|
+
h6 , S T F , heading level 6
|
73
|
+
head , S T F M , information about the document
|
74
|
+
hr ,E S T F , horizontal line
|
75
|
+
html , S T F , document
|
76
|
+
i , S T F I , italic text
|
77
|
+
iframe , T F , nline frame
|
78
|
+
img ,E S T F I , image
|
79
|
+
input ,E S T F I B, input control
|
80
|
+
ins , S T F I , inserted text
|
81
|
+
isindex , T F D , searchable index related to a document
|
82
|
+
kbd , S T F I , keyboard text
|
83
|
+
label , S T F I B, label for an input element
|
84
|
+
legend , S T F B, caption for a fieldset element
|
85
|
+
li , S T F , list item
|
86
|
+
link ,E S T F M , the relationship between a document and an external resource
|
87
|
+
map , S T F I , image-map
|
88
|
+
menu , T F D , menu list
|
89
|
+
meta ,E S T F M , metadata
|
90
|
+
noframes , T F B, alternate content where frames not supported
|
91
|
+
noscript , S T F B, alternate content script not supported
|
92
|
+
object , S T F I B, embedded object
|
93
|
+
ol , S T F , ordered list
|
94
|
+
optgroup , S T F B, group of related options in a select list
|
95
|
+
option , S T F B, option in a select list
|
96
|
+
p , S T F , paragraph
|
97
|
+
param ,E S T F , parameter for an object
|
98
|
+
pre , S T F , preformatted text
|
99
|
+
q , S T F I , short quotation
|
100
|
+
rb , , ruby base text
|
101
|
+
rbc , , ruby base container (complex)
|
102
|
+
rp , , ruby simple text container
|
103
|
+
rt , , ruby annotation text
|
104
|
+
rtc , , ruby text container (complex)
|
105
|
+
ruby , I , ruby pronunciation aid
|
106
|
+
s , T F D I , strikethrough text
|
107
|
+
samp , S T F I , sample computer code
|
108
|
+
script , S T F I B, client-side script
|
109
|
+
select , S T F I B, select list (drop-down list)
|
110
|
+
small , S T F I , small text
|
111
|
+
span , S T F I , section in a document
|
112
|
+
strike , T F D I , strikethrough text
|
113
|
+
strong , S T F I , strong text
|
114
|
+
style , S T F B, style information for a document
|
115
|
+
sub , S T F I , subscripted text
|
116
|
+
sup , S T F I , superscripted text
|
117
|
+
table , S T F , table
|
118
|
+
tbody , S T F , Groups the body content in a table
|
119
|
+
td , S T F , cell in a table
|
120
|
+
textarea , S T F I B, multi-line text input control
|
121
|
+
tfoot , S T F , Groups the footer content in a table
|
122
|
+
th , S T F , header cell in a table
|
123
|
+
thead , S T F , Groups the header content in a table
|
124
|
+
title , S T F M , the title of a document
|
125
|
+
tr , S T F , row in a table
|
126
|
+
tt , S T F I , teletype text
|
127
|
+
u , T F D I , underlined text
|
128
|
+
ul , S T F , unordered list
|
129
|
+
var , S T F I , variable part of a text
|
130
|
+
xmp , D , preformatted text
|