iudex-html 1.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/History.rdoc +2 -0
- data/Manifest.txt +24 -0
- data/README.rdoc +25 -0
- data/Rakefile +53 -0
- data/build/HTML.java.erb +91 -0
- data/build/attributes +82 -0
- data/build/java_generate.rb +139 -0
- data/build/tags +130 -0
- data/lib/iudex-html.rb +56 -0
- data/lib/iudex-html/base.rb +21 -0
- data/lib/iudex-html/factory_helper.rb +95 -0
- data/lib/iudex-html/iudex-html-1.0.0.jar +0 -0
- data/pom.xml +51 -0
- data/test/html_test_helper.rb +100 -0
- data/test/setup.rb +38 -0
- data/test/test_characters_normalizer.rb +81 -0
- data/test/test_extract_filter.rb +165 -0
- data/test/test_factory_helper.rb +51 -0
- data/test/test_html_parser.rb +128 -0
- data/test/test_other_filters.rb +51 -0
- data/test/test_other_tree_filters.rb +124 -0
- data/test/test_parse_filter.rb +72 -0
- data/test/test_tree_walker.rb +94 -0
- data/test/test_word_counters.rb +96 -0
- metadata +162 -0
data/History.rdoc
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
History.rdoc
|
2
|
+
Manifest.txt
|
3
|
+
README.rdoc
|
4
|
+
Rakefile
|
5
|
+
pom.xml
|
6
|
+
build/HTML.java.erb
|
7
|
+
build/attributes
|
8
|
+
build/java_generate.rb
|
9
|
+
build/tags
|
10
|
+
lib/iudex-html/base.rb
|
11
|
+
lib/iudex-html.rb
|
12
|
+
lib/iudex-html/factory_helper.rb
|
13
|
+
test/html_test_helper.rb
|
14
|
+
test/setup.rb
|
15
|
+
test/test_characters_normalizer.rb
|
16
|
+
test/test_extract_filter.rb
|
17
|
+
test/test_factory_helper.rb
|
18
|
+
test/test_html_parser.rb
|
19
|
+
test/test_other_filters.rb
|
20
|
+
test/test_other_tree_filters.rb
|
21
|
+
test/test_parse_filter.rb
|
22
|
+
test/test_tree_walker.rb
|
23
|
+
test/test_word_counters.rb
|
24
|
+
lib/iudex-html/iudex-html-1.0.0.jar
|
data/README.rdoc
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
= iudex-html
|
2
|
+
|
3
|
+
* http://github.com/dekellum/iudex
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Iudex is a general purpose web crawler and feed processor in
|
8
|
+
ruby/java. The iudex-html gem contains filters for HTML parsing,
|
9
|
+
filtering, exracting text and links.
|
10
|
+
|
11
|
+
== License
|
12
|
+
|
13
|
+
Copyright (c) 2010-2011 David Kellum
|
14
|
+
|
15
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you
|
16
|
+
may not use this file except in compliance with the License. You
|
17
|
+
may obtain a copy of the License at:
|
18
|
+
|
19
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
20
|
+
|
21
|
+
Unless required by applicable law or agreed to in writing, software
|
22
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
23
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
24
|
+
implied. See the License for the specific language governing
|
25
|
+
permissions and limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
$LOAD_PATH << './lib'
|
4
|
+
require 'iudex-html/base'
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'rjack-tarpit', '~> 1.2'
|
8
|
+
require 'rjack-tarpit'
|
9
|
+
|
10
|
+
t = RJack::TarPit.new( 'iudex-html',
|
11
|
+
Iudex::HTML::VERSION,
|
12
|
+
:no_assembly, :java_platform )
|
13
|
+
|
14
|
+
t.specify do |h|
|
15
|
+
h.developer( "David Kellum", "dek-oss@gravitext.com" )
|
16
|
+
h.extra_deps += [ [ 'iudex-core', '~> 1.0.0' ],
|
17
|
+
[ 'rjack-nekohtml', '~> 1.9.14' ],
|
18
|
+
[ 'gravitext-xmlprod', '~> 1.4.0' ] ]
|
19
|
+
|
20
|
+
h.testlib = :minitest
|
21
|
+
h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
|
22
|
+
[ 'rjack-logback', '~> 1.0' ] ]
|
23
|
+
end
|
24
|
+
|
25
|
+
file 'Manifest.txt' => [ 'pom.xml' ]
|
26
|
+
|
27
|
+
task :check_pom_version do
|
28
|
+
t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
|
29
|
+
end
|
30
|
+
task :check_history_version do
|
31
|
+
t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
|
32
|
+
end
|
33
|
+
task :check_history_date do
|
34
|
+
t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
|
35
|
+
end
|
36
|
+
|
37
|
+
task :gem => [ :check_pom_version, :check_history_version ]
|
38
|
+
task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
|
39
|
+
task :push => [ :check_history_date ]
|
40
|
+
|
41
|
+
file 'target/.tarpit' => [ 'src/main/java/iudex/html/HTML.java' ]
|
42
|
+
|
43
|
+
file 'src/main/java/iudex/html/HTML.java' => FileList.new( "build/*" ) do
|
44
|
+
require 'build/java_generate'
|
45
|
+
puts "Generating HTML.java"
|
46
|
+
JavaGenerator.new.run
|
47
|
+
end
|
48
|
+
|
49
|
+
task :clean do
|
50
|
+
rm_f 'src/main/java/iudex/html/HTML.java'
|
51
|
+
end
|
52
|
+
|
53
|
+
t.define_tasks
|
data/build/HTML.java.erb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2010-2011 David Kellum
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
* may not use this file except in compliance with the License. You may
|
6
|
+
* obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
* implied. See the License for the specific language governing
|
14
|
+
* permissions and limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
package iudex.html;
|
18
|
+
|
19
|
+
import java.util.Arrays;
|
20
|
+
import java.util.Collections;
|
21
|
+
import java.util.List;
|
22
|
+
import java.util.HashMap;
|
23
|
+
import java.util.Map;
|
24
|
+
|
25
|
+
import iudex.html.HTMLTag.Flag;
|
26
|
+
import static iudex.html.HTMLTag.Flag.*;
|
27
|
+
|
28
|
+
import com.gravitext.xml.producer.Namespace;
|
29
|
+
import com.gravitext.xml.producer.Attribute;
|
30
|
+
|
31
|
+
/**
|
32
|
+
* HTML Tag constants
|
33
|
+
* This class is GENERATED by java_generate.rb.
|
34
|
+
*/
|
35
|
+
public class HTML
|
36
|
+
{
|
37
|
+
public static final Namespace NS_XHTML =
|
38
|
+
new Namespace( Namespace.DEFAULT, "http://www.w3.org/1999/xhtml" );
|
39
|
+
|
40
|
+
public static final Map<String,HTMLTag> TAGS =
|
41
|
+
new HashMap<String,HTMLTag>( 127 );
|
42
|
+
|
43
|
+
public static final Map<String,Attribute> ATTRIBUTES =
|
44
|
+
new HashMap<String,Attribute>( 59 );
|
45
|
+
|
46
|
+
public static final List<Attribute> EMPTY_ATTS = Collections.emptyList();
|
47
|
+
% attributes.each do |a|
|
48
|
+
% if a.desc
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Attribute <%= a.name %>: <%= a.desc %>
|
52
|
+
*/
|
53
|
+
% end
|
54
|
+
public static final Attribute <%= awidth( 'ATTR_' + const( a.name.upcase ), 5 ) %> = attr( <%= awidth( '"' + a.name + '"', 2 ) %> );
|
55
|
+
% end
|
56
|
+
|
57
|
+
% tags.each do |tag|
|
58
|
+
% targs = [ '"' + tag.name + '"' ]
|
59
|
+
% basic_atts = tag.basic_atts.map { |a| 'ATTR_' + const( a.name.upcase ) }
|
60
|
+
% targs << if basic_atts.empty?
|
61
|
+
% "EMPTY_ATTS"
|
62
|
+
% else
|
63
|
+
% "Arrays.asList( #{ basic_atts.join( ', ' ) } )"
|
64
|
+
% end
|
65
|
+
% targs += tag.flags
|
66
|
+
% if tag.desc
|
67
|
+
/**
|
68
|
+
* Tag <<%= tag.name %>>: <%= tag.desc %>
|
69
|
+
*/
|
70
|
+
% end
|
71
|
+
public static final HTMLTag <%= twidth( tag.name.upcase ) %> =
|
72
|
+
tag( <%= targs.join( ', ' ) %> );
|
73
|
+
|
74
|
+
% end
|
75
|
+
|
76
|
+
private static HTMLTag tag( String name,
|
77
|
+
List<Attribute> basicAtts,
|
78
|
+
Flag...flags )
|
79
|
+
{
|
80
|
+
HTMLTag t = new HTMLTag( name, NS_XHTML, basicAtts, flags );
|
81
|
+
TAGS.put( t.name(), t );
|
82
|
+
return t;
|
83
|
+
}
|
84
|
+
|
85
|
+
private static Attribute attr( String name )
|
86
|
+
{
|
87
|
+
Attribute a = new Attribute( name, NS_XHTML );
|
88
|
+
ATTRIBUTES.put( a.name(), a );
|
89
|
+
return a;
|
90
|
+
}
|
91
|
+
}
|
data/build/attributes
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
# HTML Attributes
|
2
|
+
#
|
3
|
+
# Copyright (c) 2010-2011 David Kellum
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
6
|
+
# may not use this file except in compliance with the License. You may
|
7
|
+
# obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
14
|
+
# implied. See the License for the specific language governing
|
15
|
+
# permissions and limitations under the License.
|
16
|
+
#
|
17
|
+
# Format:
|
18
|
+
# (CSV like) columns: name, tags, description
|
19
|
+
# Tags marked with asterisk (*): attribute is for style purposes only.
|
20
|
+
#
|
21
|
+
# Sources
|
22
|
+
# http://www.w3.org/TR/xhtml11/
|
23
|
+
# http://www.w3.org/TR/html4/
|
24
|
+
# http://www.w3schools.com/tags/ref_standardattributes.asp
|
25
|
+
# http://xhtml.com
|
26
|
+
|
27
|
+
CORE :: ALL except: base head html meta param script style title
|
28
|
+
class ,*CORE
|
29
|
+
id ,*CORE
|
30
|
+
style ,*CORE
|
31
|
+
title ,CORE, extra title
|
32
|
+
|
33
|
+
LANG :: ALL except: base br frame frameset hr iframe param
|
34
|
+
dir ,LANG, Text direction; ltr or rtl
|
35
|
+
lang ,LANG, language_code; also xml:lang
|
36
|
+
|
37
|
+
# Meta tag attributes
|
38
|
+
http-equiv ,meta, HTTP Header name
|
39
|
+
content ,meta, text
|
40
|
+
scheme ,meta, format URI
|
41
|
+
|
42
|
+
# Anchor and link attributes
|
43
|
+
charset ,a link, char_encoding of link
|
44
|
+
coords ,*a, coordinates; i.e. image map
|
45
|
+
hreflang ,link, language_code of referent
|
46
|
+
href ,a base link, URL
|
47
|
+
media ,link
|
48
|
+
name ,a, section_name anchor
|
49
|
+
rel ,a link
|
50
|
+
rev ,a link
|
51
|
+
shape ,*a
|
52
|
+
target ,*a *base *link
|
53
|
+
type ,link
|
54
|
+
|
55
|
+
# Image and some frame attributes
|
56
|
+
src ,frame img
|
57
|
+
alt ,img
|
58
|
+
height ,img *tr *th *td *iframe *object
|
59
|
+
width ,img *table *tr *th *td *iframe *object
|
60
|
+
|
61
|
+
# Table specific attributes
|
62
|
+
abbr ,tr th
|
63
|
+
align ,table tr td th iframe object
|
64
|
+
axis ,tr th
|
65
|
+
bgcolor ,*table *tr *td *th
|
66
|
+
border ,*table
|
67
|
+
cellpadding ,*table
|
68
|
+
cellspacing ,*table
|
69
|
+
char ,tr td th
|
70
|
+
charoff ,tr td th
|
71
|
+
colspan ,tr td th
|
72
|
+
frame ,*table
|
73
|
+
headers ,tr td
|
74
|
+
nowrap ,*tr *td *th
|
75
|
+
rowspan ,tr td th
|
76
|
+
rules ,*table
|
77
|
+
scope ,tr td th
|
78
|
+
summary ,table
|
79
|
+
valign ,*tr *td
|
80
|
+
|
81
|
+
# Purposefully omitted (will be dropped on parse)
|
82
|
+
# -- The event attributes on*, onmouse*, onkey*
|
@@ -0,0 +1,139 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2010-2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You may
|
9
|
+
# obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require 'erb'
|
21
|
+
require 'ostruct'
|
22
|
+
|
23
|
+
# Generator for HTML.java tags/attribute input configuration
|
24
|
+
class JavaGenerator
|
25
|
+
|
26
|
+
attr_reader :tags, :attributes
|
27
|
+
|
28
|
+
BASEDIR = File.dirname( __FILE__ )
|
29
|
+
|
30
|
+
JAVA_OUT = File.join( BASEDIR, '..', 'src',
|
31
|
+
'main', 'java', 'iudex', 'html', 'HTML.java' )
|
32
|
+
|
33
|
+
def run( java_file = JAVA_OUT )
|
34
|
+
parse_tags
|
35
|
+
parse_attributes
|
36
|
+
map_basic_attributes
|
37
|
+
generate_java( java_file )
|
38
|
+
end
|
39
|
+
|
40
|
+
FLAGS = {
|
41
|
+
'D' => 'DEPRECATED',
|
42
|
+
'I' => 'INLINE',
|
43
|
+
'M' => 'METADATA',
|
44
|
+
'B' => 'BANNED' }
|
45
|
+
|
46
|
+
def parse_tags()
|
47
|
+
@tags = []
|
48
|
+
|
49
|
+
open( File.join( BASEDIR, 'tags' ), 'r' ) do |fin|
|
50
|
+
fin.each do |line|
|
51
|
+
case line
|
52
|
+
when /^\s*#/, /^\s*$/
|
53
|
+
# ignore comment, empty lines
|
54
|
+
when /^\s*[^\s,]+\s*,[^,]*,[^,]*$/
|
55
|
+
r = line.split(',').map { |c| c.strip }
|
56
|
+
r = r.compact.reject { |c| c.empty? }
|
57
|
+
flags = r[1].split(' ').map { |f| FLAGS[f] }.compact
|
58
|
+
@tags << OpenStruct.new( :name => r[0],
|
59
|
+
:flags => flags,
|
60
|
+
:desc => r[2] )
|
61
|
+
else
|
62
|
+
raise "Parse ERROR: line [#{line}]"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
@tag_max_len = @tags.map { |t| t.name.length }.max
|
68
|
+
[ @tags ]
|
69
|
+
end
|
70
|
+
|
71
|
+
def parse_attributes()
|
72
|
+
@attributes = []
|
73
|
+
tagsets = {}
|
74
|
+
|
75
|
+
open( File.join( BASEDIR, 'attributes' ), 'r' ) do |fin|
|
76
|
+
fin.each do |line|
|
77
|
+
case line
|
78
|
+
when /^\s*#/, /^\s*$/
|
79
|
+
# ignore comment, empty lines
|
80
|
+
when /^\s*([A-Z]+)\s*::\s*ALL\s+except:(.*)$/
|
81
|
+
sname = $1
|
82
|
+
except = $2.split( ' ' ).compact.reject { |t| t.empty? }
|
83
|
+
tset = @tags.reject { |t| except.include?( t.name ) }
|
84
|
+
tset.map! { |t| t.name }
|
85
|
+
tagsets[sname] = tset
|
86
|
+
when /^\s*[^\s,]+\s*,/
|
87
|
+
r = line.split(',').map { |c| c.strip }
|
88
|
+
r = r.compact.reject { |c| c.empty? }
|
89
|
+
# FIXME: Handle attributes, desc.
|
90
|
+
|
91
|
+
btags = r[1].split(' ').compact.reject { |t| t.empty? || t =~ /^\*/ }
|
92
|
+
btags = btags.map { |t| tagsets[ t ] || t }.flatten
|
93
|
+
|
94
|
+
@attributes << OpenStruct.new( :name => r[0],
|
95
|
+
:basic_tags => btags,
|
96
|
+
:desc => r[2] )
|
97
|
+
else
|
98
|
+
raise "Parse ERROR: line [#{line}]"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def map_basic_attributes()
|
104
|
+
@tags.each do |tag|
|
105
|
+
tag.basic_atts =
|
106
|
+
@attributes.select { |attr| attr.basic_tags.include?( tag.name ) }
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
@attr_max_len = @attributes.map { |t| t.name.length }.max
|
111
|
+
[ @attributes ]
|
112
|
+
end
|
113
|
+
|
114
|
+
def twidth( val, extra = 0 )
|
115
|
+
val + ( ' ' * ( @tag_max_len - val.length + extra ) )
|
116
|
+
end
|
117
|
+
|
118
|
+
def awidth( val, extra = 0 )
|
119
|
+
val + ( ' ' * ( @attr_max_len - val.length + extra ) )
|
120
|
+
end
|
121
|
+
|
122
|
+
def const( val )
|
123
|
+
val.gsub( /\-/, '_' )
|
124
|
+
end
|
125
|
+
|
126
|
+
def generate_java( java_file )
|
127
|
+
erb_file = File.join( BASEDIR, 'HTML.java.erb' )
|
128
|
+
template = ERB.new( IO.read( erb_file ), nil, '%' )
|
129
|
+
|
130
|
+
open( java_file, 'w' ) do |fout|
|
131
|
+
fout << template.result( binding )
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
if $0 == __FILE__
|
138
|
+
JavaGenerator.new.run( *ARGV )
|
139
|
+
end
|
data/build/tags
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
# HTML Tags
|
2
|
+
#
|
3
|
+
# Copyright (c) 2010-2011 David Kellum
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
6
|
+
# may not use this file except in compliance with the License. You may
|
7
|
+
# obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
14
|
+
# implied. See the License for the specific language governing
|
15
|
+
# permissions and limitations under the License.
|
16
|
+
#
|
17
|
+
# Sources
|
18
|
+
# http://www.w3.org/TR/xhtml11/
|
19
|
+
# http://www.w3.org/TR/html4/
|
20
|
+
# http://www.w3schools.com/tags/default.asp
|
21
|
+
# http://xhtml.com/
|
22
|
+
#
|
23
|
+
# Codes:
|
24
|
+
# E :: Empty Tag
|
25
|
+
# S :: In Strict HTML 4.01/XHTML 1.0
|
26
|
+
# T :: In Transitional HTML 4.01/XHTML 1.0
|
27
|
+
# F :: In frameset annex
|
28
|
+
# D :: Deprecated
|
29
|
+
# I :: Inline elements (Note <br/> is not labeled inline.)
|
30
|
+
# M :: Metadata elements (content not visible text), i.e. head
|
31
|
+
# B :: Banned/blacklisted elements from which text should not be extracted.
|
32
|
+
|
33
|
+
a , S T F I , anchor
|
34
|
+
abbr , S T F I , abbreviation
|
35
|
+
acronym , S T F I , acronym
|
36
|
+
address , S T F , contact information for the author or owner
|
37
|
+
applet , T F D , embedded applet
|
38
|
+
area ,E S T F , area inside an image-map
|
39
|
+
b , S T F I , bold text
|
40
|
+
base ,E S T F M , default address or a default target for all links on a page
|
41
|
+
basefont ,E T F D I M , default font; color; or size for the text in a page
|
42
|
+
bdo , S T F I , the text direction
|
43
|
+
big , S T F I , big text
|
44
|
+
blockquote , S T F , long quotation
|
45
|
+
body , S T F , the document's body
|
46
|
+
br ,E S T F , single line break
|
47
|
+
button , S T F I B, push button
|
48
|
+
caption , S T F , table caption
|
49
|
+
center , T F D , centered text
|
50
|
+
cite , S T F I , citation
|
51
|
+
code , S T F I , computer code text
|
52
|
+
col ,E S T F , attribute values for one or more columns in a table
|
53
|
+
colgroup , S T F , group of columns in a table for formatting
|
54
|
+
dd , S T F , description of a term in a definition list
|
55
|
+
del , S T F I , deleted text
|
56
|
+
dfn , S T F I , definition term
|
57
|
+
dir , T F D , directory list
|
58
|
+
div , S T F , section in a document
|
59
|
+
dl , S T F , definition list
|
60
|
+
dt , S T F , term (an item) in a definition list
|
61
|
+
em , S T F I , emphasized text
|
62
|
+
fieldset , S T F B, border around elements in a form
|
63
|
+
font , T F D I , font; color; or size for text
|
64
|
+
form , S T F , form for user input
|
65
|
+
frame ,E F B, window (a frame) in a frameset
|
66
|
+
frameset , F B, set of frames
|
67
|
+
h1 , S T F , heading level 1
|
68
|
+
h2 , S T F , heading level 2
|
69
|
+
h3 , S T F , heading level 3
|
70
|
+
h4 , S T F , heading level 4
|
71
|
+
h5 , S T F , heading level 5
|
72
|
+
h6 , S T F , heading level 6
|
73
|
+
head , S T F M , information about the document
|
74
|
+
hr ,E S T F , horizontal line
|
75
|
+
html , S T F , document
|
76
|
+
i , S T F I , italic text
|
77
|
+
iframe , T F , nline frame
|
78
|
+
img ,E S T F I , image
|
79
|
+
input ,E S T F I B, input control
|
80
|
+
ins , S T F I , inserted text
|
81
|
+
isindex , T F D , searchable index related to a document
|
82
|
+
kbd , S T F I , keyboard text
|
83
|
+
label , S T F I B, label for an input element
|
84
|
+
legend , S T F B, caption for a fieldset element
|
85
|
+
li , S T F , list item
|
86
|
+
link ,E S T F M , the relationship between a document and an external resource
|
87
|
+
map , S T F I , image-map
|
88
|
+
menu , T F D , menu list
|
89
|
+
meta ,E S T F M , metadata
|
90
|
+
noframes , T F B, alternate content where frames not supported
|
91
|
+
noscript , S T F B, alternate content script not supported
|
92
|
+
object , S T F I B, embedded object
|
93
|
+
ol , S T F , ordered list
|
94
|
+
optgroup , S T F B, group of related options in a select list
|
95
|
+
option , S T F B, option in a select list
|
96
|
+
p , S T F , paragraph
|
97
|
+
param ,E S T F , parameter for an object
|
98
|
+
pre , S T F , preformatted text
|
99
|
+
q , S T F I , short quotation
|
100
|
+
rb , , ruby base text
|
101
|
+
rbc , , ruby base container (complex)
|
102
|
+
rp , , ruby simple text container
|
103
|
+
rt , , ruby annotation text
|
104
|
+
rtc , , ruby text container (complex)
|
105
|
+
ruby , I , ruby pronunciation aid
|
106
|
+
s , T F D I , strikethrough text
|
107
|
+
samp , S T F I , sample computer code
|
108
|
+
script , S T F I B, client-side script
|
109
|
+
select , S T F I B, select list (drop-down list)
|
110
|
+
small , S T F I , small text
|
111
|
+
span , S T F I , section in a document
|
112
|
+
strike , T F D I , strikethrough text
|
113
|
+
strong , S T F I , strong text
|
114
|
+
style , S T F B, style information for a document
|
115
|
+
sub , S T F I , subscripted text
|
116
|
+
sup , S T F I , superscripted text
|
117
|
+
table , S T F , table
|
118
|
+
tbody , S T F , Groups the body content in a table
|
119
|
+
td , S T F , cell in a table
|
120
|
+
textarea , S T F I B, multi-line text input control
|
121
|
+
tfoot , S T F , Groups the footer content in a table
|
122
|
+
th , S T F , header cell in a table
|
123
|
+
thead , S T F , Groups the header content in a table
|
124
|
+
title , S T F M , the title of a document
|
125
|
+
tr , S T F , row in a table
|
126
|
+
tt , S T F I , teletype text
|
127
|
+
u , T F D I , underlined text
|
128
|
+
ul , S T F , unordered list
|
129
|
+
var , S T F I , variable part of a text
|
130
|
+
xmp , D , preformatted text
|