nokogiri 1.4.7-x86-mingw32 → 1.5.0-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (132) hide show
  1. data/CHANGELOG.ja.rdoc +59 -1
  2. data/CHANGELOG.rdoc +51 -2
  3. data/Manifest.txt +66 -66
  4. data/README.ja.rdoc +4 -0
  5. data/README.rdoc +31 -3
  6. data/Rakefile +111 -135
  7. data/bin/nokogiri +1 -2
  8. data/ext/nokogiri/extconf.rb +9 -3
  9. data/ext/nokogiri/nokogiri.c +21 -3
  10. data/ext/nokogiri/nokogiri.h +0 -2
  11. data/ext/nokogiri/xml_node.c +0 -6
  12. data/ext/nokogiri/xml_relax_ng.c +0 -7
  13. data/ext/nokogiri/xml_sax_parser.c +9 -4
  14. data/ext/nokogiri/xslt_stylesheet.c +5 -1
  15. data/lib/nokogiri.rb +19 -25
  16. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  17. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  18. data/lib/nokogiri/css.rb +0 -1
  19. data/lib/nokogiri/html.rb +1 -0
  20. data/lib/nokogiri/html/document.rb +9 -2
  21. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  22. data/lib/nokogiri/version.rb +76 -31
  23. data/lib/nokogiri/xml/attribute_decl.rb +1 -1
  24. data/lib/nokogiri/xml/builder.rb +7 -0
  25. data/lib/nokogiri/xml/document.rb +17 -1
  26. data/lib/nokogiri/xml/document_fragment.rb +14 -0
  27. data/lib/nokogiri/xml/dtd.rb +1 -1
  28. data/lib/nokogiri/xml/element_decl.rb +1 -1
  29. data/lib/nokogiri/xml/entity_decl.rb +1 -1
  30. data/lib/nokogiri/xml/node.rb +39 -15
  31. data/lib/nokogiri/xml/node/save_options.rb +14 -5
  32. data/lib/nokogiri/xml/node_set.rb +7 -0
  33. data/lib/nokogiri/xml/reader.rb +2 -2
  34. data/lib/nokogiri/xml/sax/document.rb +2 -2
  35. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  36. data/nokogiri_help_responses.md +40 -0
  37. data/tasks/cross_compile.rb +134 -155
  38. data/tasks/nokogiri.org.rb +18 -0
  39. data/tasks/test.rb +2 -2
  40. data/test/files/metacharset.html +10 -0
  41. data/test/files/noencoding.html +47 -0
  42. data/test/helper.rb +2 -1
  43. data/test/html/sax/test_parser.rb +22 -5
  44. data/test/html/test_document.rb +21 -2
  45. data/test/html/test_document_encoding.rb +9 -1
  46. data/test/html/test_document_fragment.rb +5 -3
  47. data/test/html/test_element_description.rb +4 -2
  48. data/test/test_memory_leak.rb +4 -39
  49. data/test/test_nokogiri.rb +14 -20
  50. data/test/test_reader.rb +12 -8
  51. data/test/xml/node/test_save_options.rb +10 -2
  52. data/test/xml/sax/test_parser.rb +14 -7
  53. data/test/xml/test_attribute_decl.rb +7 -3
  54. data/test/xml/test_builder.rb +17 -0
  55. data/test/xml/test_document.rb +31 -4
  56. data/test/xml/test_dtd.rb +13 -3
  57. data/test/xml/test_element_content.rb +1 -1
  58. data/test/xml/test_element_decl.rb +1 -1
  59. data/test/xml/test_entity_decl.rb +12 -10
  60. data/test/xml/test_namespace.rb +7 -5
  61. data/test/xml/test_node.rb +21 -10
  62. data/test/xml/test_node_reparenting.rb +16 -3
  63. data/test/xml/test_node_set.rb +34 -0
  64. data/test/xslt/test_custom_functions.rb +2 -2
  65. data/test/xslt/test_exception_handling.rb +37 -0
  66. metadata +110 -88
  67. data/lib/nokogiri/ffi/encoding_handler.rb +0 -42
  68. data/lib/nokogiri/ffi/html/document.rb +0 -28
  69. data/lib/nokogiri/ffi/html/element_description.rb +0 -81
  70. data/lib/nokogiri/ffi/html/entity_lookup.rb +0 -16
  71. data/lib/nokogiri/ffi/html/sax/parser_context.rb +0 -38
  72. data/lib/nokogiri/ffi/io_callbacks.rb +0 -42
  73. data/lib/nokogiri/ffi/libxml.rb +0 -420
  74. data/lib/nokogiri/ffi/structs/common_node.rb +0 -38
  75. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +0 -24
  76. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +0 -13
  77. data/lib/nokogiri/ffi/structs/xml_alloc.rb +0 -16
  78. data/lib/nokogiri/ffi/structs/xml_attr.rb +0 -20
  79. data/lib/nokogiri/ffi/structs/xml_attribute.rb +0 -27
  80. data/lib/nokogiri/ffi/structs/xml_buffer.rb +0 -16
  81. data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +0 -11
  82. data/lib/nokogiri/ffi/structs/xml_document.rb +0 -117
  83. data/lib/nokogiri/ffi/structs/xml_dtd.rb +0 -28
  84. data/lib/nokogiri/ffi/structs/xml_element.rb +0 -26
  85. data/lib/nokogiri/ffi/structs/xml_element_content.rb +0 -17
  86. data/lib/nokogiri/ffi/structs/xml_entity.rb +0 -32
  87. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +0 -12
  88. data/lib/nokogiri/ffi/structs/xml_node.rb +0 -28
  89. data/lib/nokogiri/ffi/structs/xml_node_set.rb +0 -53
  90. data/lib/nokogiri/ffi/structs/xml_notation.rb +0 -11
  91. data/lib/nokogiri/ffi/structs/xml_ns.rb +0 -15
  92. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +0 -20
  93. data/lib/nokogiri/ffi/structs/xml_parser_input.rb +0 -19
  94. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +0 -14
  95. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +0 -51
  96. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +0 -124
  97. data/lib/nokogiri/ffi/structs/xml_schema.rb +0 -13
  98. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +0 -31
  99. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +0 -12
  100. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +0 -38
  101. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +0 -35
  102. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +0 -20
  103. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +0 -13
  104. data/lib/nokogiri/ffi/weak_bucket.rb +0 -40
  105. data/lib/nokogiri/ffi/xml/attr.rb +0 -41
  106. data/lib/nokogiri/ffi/xml/attribute_decl.rb +0 -27
  107. data/lib/nokogiri/ffi/xml/cdata.rb +0 -19
  108. data/lib/nokogiri/ffi/xml/comment.rb +0 -18
  109. data/lib/nokogiri/ffi/xml/document.rb +0 -174
  110. data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -21
  111. data/lib/nokogiri/ffi/xml/dtd.rb +0 -67
  112. data/lib/nokogiri/ffi/xml/element_content.rb +0 -43
  113. data/lib/nokogiri/ffi/xml/element_decl.rb +0 -19
  114. data/lib/nokogiri/ffi/xml/entity_decl.rb +0 -36
  115. data/lib/nokogiri/ffi/xml/entity_reference.rb +0 -19
  116. data/lib/nokogiri/ffi/xml/namespace.rb +0 -44
  117. data/lib/nokogiri/ffi/xml/node.rb +0 -559
  118. data/lib/nokogiri/ffi/xml/node_set.rb +0 -150
  119. data/lib/nokogiri/ffi/xml/processing_instruction.rb +0 -20
  120. data/lib/nokogiri/ffi/xml/reader.rb +0 -236
  121. data/lib/nokogiri/ffi/xml/relax_ng.rb +0 -85
  122. data/lib/nokogiri/ffi/xml/sax/parser.rb +0 -143
  123. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +0 -79
  124. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +0 -51
  125. data/lib/nokogiri/ffi/xml/schema.rb +0 -109
  126. data/lib/nokogiri/ffi/xml/syntax_error.rb +0 -98
  127. data/lib/nokogiri/ffi/xml/text.rb +0 -18
  128. data/lib/nokogiri/ffi/xml/xpath.rb +0 -9
  129. data/lib/nokogiri/ffi/xml/xpath_context.rb +0 -153
  130. data/lib/nokogiri/ffi/xslt/stylesheet.rb +0 -77
  131. data/lib/nokogiri/version_warning.rb +0 -14
  132. data/test/ffi/test_document.rb +0 -35
data/bin/nokogiri CHANGED
@@ -33,8 +33,7 @@ opts = OptionParser.new do |opts|
33
33
  end
34
34
 
35
35
  opts.on_tail("-v", "--version", "Show version") do
36
- require 'yaml'
37
- puts Nokogiri::VERSION_INFO.to_yaml
36
+ puts Nokogiri::VersionInfo.instance.to_markdown
38
37
  exit
39
38
  end
40
39
  end
@@ -17,9 +17,9 @@ end
17
17
  $CFLAGS << " #{ENV["CFLAGS"]}"
18
18
  $LIBS << " #{ENV["LIBS"]}"
19
19
 
20
- if Config::CONFIG['target_os'] == 'mingw32' || Config::CONFIG['target_os'] =~ /mswin32/
20
+ if RbConfig::CONFIG['target_os'] == 'mingw32' || RbConfig::CONFIG['target_os'] =~ /mswin32/
21
21
  $CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
22
- elsif Config::CONFIG['target_os'] =~ /solaris/
22
+ elsif RbConfig::CONFIG['target_os'] =~ /solaris/
23
23
  $CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
24
24
  else
25
25
  $CFLAGS << " -g -DXP_UNIX"
@@ -33,7 +33,7 @@ if RbConfig::MAKEFILE_CONFIG['CC'] =~ /gcc/
33
33
  $CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
34
34
  end
35
35
 
36
- if Config::CONFIG['target_os'] =~ /mswin32/
36
+ if RbConfig::CONFIG['target_os'] =~ /mswin32/
37
37
  lib_prefix = 'lib'
38
38
 
39
39
  # There's no default include/lib dir on Windows. Let's just add the Ruby ones
@@ -101,6 +101,12 @@ asplode "libxml2" unless find_library("#{lib_prefix}xml2", 'xmlParseDoc')
101
101
  asplode "libxslt" unless find_library("#{lib_prefix}xslt", 'xsltParseStylesheetDoc')
102
102
  asplode "libexslt" unless find_library("#{lib_prefix}exslt", 'exsltFuncRegister')
103
103
 
104
+ unless have_func('xmlHasFeature')
105
+ abort "-----\nThe function 'xmlHasFeature' is missing from your installation of libxml2. Likely this means that your installed version of libxml2 is old enough that nokogiri will not work well. To get around this problem, please upgrade your installation of libxml2.
106
+
107
+ Please visit http://nokogiri.org/tutorials/installing_nokogiri.html for more help!"
108
+ end
109
+
104
110
  have_func 'xmlFirstElementChild'
105
111
  have_func('xmlRelaxNGSetParserStructuredErrors')
106
112
  have_func('xmlRelaxNGSetParserStructuredErrors')
@@ -23,10 +23,28 @@ int vasprintf (char **strp, const char *fmt, va_list ap)
23
23
  }
24
24
  #endif
25
25
 
26
- int is_2_6_16(void)
26
+ #ifdef USING_SYSTEM_ALLOCATOR_LIBRARY /* Ruby Enterprise Edition with tcmalloc */
27
+ void vasprintf_free (void *p)
27
28
  {
28
- return (strcmp(xmlParserVersion, "20616") <= 0) ? 1 : 0 ;
29
+ system_free(p);
29
30
  }
31
+ #else
32
+ void vasprintf_free (void *p)
33
+ {
34
+ free(p);
35
+ }
36
+ #endif
37
+
38
+ #ifndef __MACRUBY__
39
+ /* Allocate strdupped strings with the same memory allocator Ruby uses. */
40
+ static char *ruby_strdup(const char *s)
41
+ {
42
+ size_t len = strlen(s);
43
+ char *result = ruby_xmalloc((ssize_t) (len + 1));
44
+ memcpy(result, s, len + 1);
45
+ return result;
46
+ }
47
+ #endif
30
48
 
31
49
  void Init_nokogiri()
32
50
  {
@@ -35,7 +53,7 @@ void Init_nokogiri()
35
53
  (xmlFreeFunc)ruby_xfree,
36
54
  (xmlMallocFunc)ruby_xmalloc,
37
55
  (xmlReallocFunc)ruby_xrealloc,
38
- strdup
56
+ ruby_strdup
39
57
  );
40
58
  #endif
41
59
 
@@ -36,8 +36,6 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
36
36
  #include <st.h>
37
37
  #endif
38
38
 
39
- int is_2_6_16(void) ;
40
-
41
39
  #ifndef UNUSED
42
40
  # if defined(__GNUC__)
43
41
  # define MAYBE_UNUSED(name) name __attribute__((unused))
@@ -166,12 +166,6 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
166
166
  xmlAddNextSibling(pivot, new_next_text);
167
167
  }
168
168
 
169
- /* TODO: I really want to remove this. We shouldn't support 2.6.16 anymore */
170
- if ( reparentee->type == XML_TEXT_NODE && pivot->type == XML_TEXT_NODE && is_2_6_16() ) {
171
- /* work around a string-handling bug in libxml 2.6.16. we'd rather leak than segfault. */
172
- pivot->content = xmlStrdup(pivot->content);
173
- }
174
-
175
169
  if(!(reparented = (*prf)(pivot, reparentee))) {
176
170
  rb_raise(rb_eRuntimeError, "Could not reparent node");
177
171
  }
@@ -129,9 +129,6 @@ static VALUE from_document(VALUE klass, VALUE document)
129
129
  schema = xmlRelaxNGParse(ctx);
130
130
 
131
131
  xmlSetStructuredErrorFunc(NULL, NULL);
132
- if (! is_2_6_16()) {
133
- xmlRelaxNGFreeParserCtxt(ctx);
134
- }
135
132
 
136
133
  if(NULL == schema) {
137
134
  xmlErrorPtr error = xmlGetLastError();
@@ -143,10 +140,6 @@ static VALUE from_document(VALUE klass, VALUE document)
143
140
  return Qnil;
144
141
  }
145
142
 
146
- if (is_2_6_16()) {
147
- xmlRelaxNGFreeParserCtxt(ctx);
148
- }
149
-
150
143
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
151
144
  rb_iv_set(rb_schema, "@errors", errors);
152
145
 
@@ -1,6 +1,7 @@
1
1
  #include <xml_sax_parser.h>
2
2
 
3
3
  int vasprintf (char **strp, const char *fmt, va_list ap);
4
+ void vasprintf_free (void *p);
4
5
 
5
6
  static ID id_start_document, id_end_document, id_start_element, id_end_element;
6
7
  static ID id_start_element_namespace, id_end_element_namespace;
@@ -198,14 +199,16 @@ static void warning_func(void * ctx, const char *msg, ...)
198
199
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
199
200
  VALUE doc = rb_iv_get(self, "@document");
200
201
  char * message;
202
+ VALUE ruby_message;
201
203
 
202
204
  va_list args;
203
205
  va_start(args, msg);
204
206
  vasprintf(&message, msg, args);
205
207
  va_end(args);
206
208
 
207
- rb_funcall(doc, id_warning, 1, NOKOGIRI_STR_NEW2(message));
208
- free(message);
209
+ ruby_message = NOKOGIRI_STR_NEW2(message);
210
+ vasprintf_free(message);
211
+ rb_funcall(doc, id_warning, 1, ruby_message);
209
212
  }
210
213
 
211
214
  static void error_func(void * ctx, const char *msg, ...)
@@ -213,14 +216,16 @@ static void error_func(void * ctx, const char *msg, ...)
213
216
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
214
217
  VALUE doc = rb_iv_get(self, "@document");
215
218
  char * message;
219
+ VALUE ruby_message;
216
220
 
217
221
  va_list args;
218
222
  va_start(args, msg);
219
223
  vasprintf(&message, msg, args);
220
224
  va_end(args);
221
225
 
222
- rb_funcall(doc, id_error, 1, NOKOGIRI_STR_NEW2(message));
223
- free(message);
226
+ ruby_message = NOKOGIRI_STR_NEW2(message);
227
+ vasprintf_free(message);
228
+ rb_funcall(doc, id_error, 1, ruby_message);
224
229
  }
225
230
 
226
231
  static void cdata_block(void * ctx, const xmlChar * value, int len)
@@ -8,6 +8,7 @@
8
8
  VALUE xslt;
9
9
 
10
10
  int vasprintf (char **strp, const char *fmt, va_list ap);
11
+ void vasprintf_free (void *p);
11
12
 
12
13
  static void dealloc(xsltStylesheetPtr doc)
13
14
  {
@@ -20,13 +21,16 @@ NORETURN(static void xslt_generic_error_handler(void * ctx, const char *msg, ...
20
21
  static void xslt_generic_error_handler(void * ctx, const char *msg, ...)
21
22
  {
22
23
  char * message;
24
+ VALUE exception;
23
25
 
24
26
  va_list args;
25
27
  va_start(args, msg);
26
28
  vasprintf(&message, msg, args);
27
29
  va_end(args);
28
30
 
29
- rb_exc_raise(rb_exc_new2(rb_eRuntimeError, message));
31
+ exception = rb_exc_new2(rb_eRuntimeError, message);
32
+ vasprintf_free(message);
33
+ rb_exc_raise(exception);
30
34
  }
31
35
 
32
36
  /*
data/lib/nokogiri.rb CHANGED
@@ -6,32 +6,26 @@ ENV['PATH'] = [File.expand_path(
6
6
  File.join(File.dirname(__FILE__), "..", "ext", "nokogiri")
7
7
  ), ENV['PATH']].compact.join(';') if RbConfig::CONFIG['host_os'] =~ /(mswin|mingw)/i
8
8
 
9
- if ENV['NOKOGIRI_FFI'] || RUBY_PLATFORM =~ /java/
10
- require 'ffi'
11
- require 'nokogiri/ffi/libxml'
12
- else
13
- require 'nokogiri/nokogiri'
9
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
10
+ # The line below caused a problem on non-GAE rack environment.
11
+ # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
12
+ #
13
+ # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
14
+ # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
15
+ # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
16
+ # should skip loading xml jars. This is because those are in WEB-INF/lib and
17
+ # already set in the classpath.
18
+ unless $LOAD_PATH.to_s.include?("appengine-rack")
19
+ require 'isorelax.jar'
20
+ require 'jing.jar'
21
+ require 'nekohtml.jar'
22
+ require 'nekodtd.jar'
23
+ require 'xercesImpl.jar'
24
+ end
14
25
  end
15
26
 
16
- warn(<<-eowarn) if '1.8.6' == RUBY_VERSION && $VERBOSE
17
- !!! DEPRECATION WARNING !!!
18
- Hey Champ! I see you're using Ruby 1.8.6! While I applaud you for sticking to
19
- your guns and using The One True Ruby, I have to let you know that we're going
20
- to stop supporting 1.8.6. I know, it's sad. But, we just don't have time to
21
- support every version of Ruby out there. Whether we like it or not, time moves
22
- forward and so does our software.
23
-
24
- On August 1, 2010, we will no longer support Ruby 1.8.6. If nokogiri happens to
25
- work on 1.8.6 after that date, then great! We will hownever, no longer test,
26
- use, or endorse 1.8.6 as a supported platform.
27
-
28
- Thanks,
29
-
30
- Team Nokogiri
31
- eowarn
32
-
27
+ require 'nokogiri/nokogiri'
33
28
  require 'nokogiri/version'
34
- require 'nokogiri/version_warning'
35
29
  require 'nokogiri/syntax_error'
36
30
  require 'nokogiri/xml'
37
31
  require 'nokogiri/xslt'
@@ -73,13 +67,13 @@ module Nokogiri
73
67
  doc =
74
68
  if string.respond_to?(:read) ||
75
69
  string =~ /^\s*<[^Hh>]*html/i # Probably html
76
- Nokogiri::HTML(
70
+ Nokogiri.HTML(
77
71
  string,
78
72
  url,
79
73
  encoding, options || XML::ParseOptions::DEFAULT_HTML
80
74
  )
81
75
  else
82
- Nokogiri::XML(string, url, encoding,
76
+ Nokogiri.XML(string, url, encoding,
83
77
  options || XML::ParseOptions::DEFAULT_XML)
84
78
  end
85
79
  yield doc if block_given?
Binary file
Binary file
data/lib/nokogiri/css.rb CHANGED
@@ -1,6 +1,5 @@
1
1
  require 'nokogiri/css/node'
2
2
  require 'nokogiri/css/xpath_visitor'
3
-
4
3
  x = $-w
5
4
  $-w = false
6
5
  require 'nokogiri/css/parser'
data/lib/nokogiri/html.rb CHANGED
@@ -4,6 +4,7 @@ require 'nokogiri/html/document_fragment'
4
4
  require 'nokogiri/html/sax/parser_context'
5
5
  require 'nokogiri/html/sax/parser'
6
6
  require 'nokogiri/html/element_description'
7
+ require 'nokogiri/html/element_description_defaults'
7
8
 
8
9
  module Nokogiri
9
10
  class << self
@@ -70,7 +70,7 @@ module Nokogiri
70
70
 
71
71
  class << self
72
72
  ###
73
- # Parse HTML. +thing+ may be a String, or any object that
73
+ # Parse HTML. +string_or_io+ may be a String, or any object that
74
74
  # responds to _read_ and _close_ such as an IO, or StringIO.
75
75
  # +url+ is resource where this document is located. +encoding+ is the
76
76
  # encoding that should be used when processing the document. +options+
@@ -147,6 +147,8 @@ module Nokogiri
147
147
  not_found
148
148
  when 'meta'
149
149
  attr = Hash[attrs]
150
+ charset = attr['charset'] and
151
+ found charset
150
152
  http_equiv = attr['http-equiv'] and
151
153
  http_equiv.match(/\AContent-Type\z/i) and
152
154
  content = attr['content'] and
@@ -160,13 +162,18 @@ module Nokogiri
160
162
  m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
161
163
  return Nokogiri.XML(m[1]).encoding
162
164
 
165
+ if Nokogiri.jruby?
166
+ m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
167
+ return m[4]
168
+ end
169
+
163
170
  handler = SAXHandler.new
164
171
  parser = Nokogiri::HTML::SAX::Parser.new(handler)
165
172
  catch(:found) {
166
173
  parser.parse(chunk)
167
174
  }
168
175
  handler.encoding
169
- rescue => e
176
+ rescue
170
177
  nil
171
178
  end
172
179
 
@@ -0,0 +1,671 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class ElementDescription
4
+
5
+ # Methods are defined protected by method_defined? because at
6
+ # this point the C-library or Java library is alraedy loaded,
7
+ # and we don't want to clobber any methods that have been
8
+ # defined there.
9
+
10
+ Desc = Struct.new("HTMLElementDescription", :name,
11
+ :startTag, :endTag, :saveEndTag,
12
+ :empty, :depr, :dtd, :isinline,
13
+ :desc,
14
+ :subelts, :defaultsubelt,
15
+ :attrs_opt, :attrs_depr, :attrs_req)
16
+
17
+ # This is filled in down below.
18
+ DefaultDescriptions = Hash.new()
19
+
20
+ def default_desc
21
+ DefaultDescriptions[name.downcase]
22
+ end
23
+ private :default_desc
24
+
25
+ unless method_defined? :implied_start_tag?
26
+ def implied_start_tag?
27
+ d = default_desc
28
+ d ? d.startTag : nil
29
+ end
30
+ end
31
+
32
+ unless method_defined? :implied_end_tag?
33
+ def implied_end_tag?
34
+ d = default_desc
35
+ d ? d.endTag : nil
36
+ end
37
+ end
38
+
39
+ unless method_defined? :save_end_tag?
40
+ def save_end_tag?
41
+ d = default_desc
42
+ d ? d.saveEndTag : nil
43
+ end
44
+ end
45
+
46
+ unless method_defined? :deprecated?
47
+ def deprecated?
48
+ d = default_desc
49
+ d ? d.depr : nil
50
+ end
51
+ end
52
+
53
+ unless method_defined? :description
54
+ def description
55
+ d = default_desc
56
+ d ? d.desc : nil
57
+ end
58
+ end
59
+
60
+ unless method_defined? :default_sub_element
61
+ def default_sub_element
62
+ d = default_desc
63
+ d ? d.defaultsubelt : nil
64
+ end
65
+ end
66
+
67
+ unless method_defined? :optional_attributes
68
+ def optional_attributes
69
+ d = default_desc
70
+ d ? d.attrs_opt : []
71
+ end
72
+ end
73
+
74
+ unless method_defined? :deprecated_attributes
75
+ def deprecated_attributes
76
+ d = default_desc
77
+ d ? d.attrs_depr : []
78
+ end
79
+ end
80
+
81
+ unless method_defined? :required_attributes
82
+ def required_attributes
83
+ d = default_desc
84
+ d ? d.attrs_req : []
85
+ end
86
+ end
87
+
88
+ ###
89
+ # Default Element Descriptions (HTML 4.0) copied from
90
+ # libxml2/HTMLparser.c and libxml2/include/libxml/HTMLparser.h
91
+ #
92
+ # The copyright notice for those files and the following list of
93
+ # element and attribute descriptions is reproduced here:
94
+ #
95
+ # Except where otherwise noted in the source code (e.g. the
96
+ # files hash.c, list.c and the trio files, which are covered by
97
+ # a similar licence but with different Copyright notices) all
98
+ # the files are:
99
+ #
100
+ # Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved.
101
+ #
102
+ # Permission is hereby granted, free of charge, to any person
103
+ # obtaining a copy of this software and associated documentation
104
+ # files (the "Software"), to deal in the Software without
105
+ # restriction, including without limitation the rights to use,
106
+ # copy, modify, merge, publish, distribute, sublicense, and/or
107
+ # sell copies of the Software, and to permit persons to whom the
108
+ # Software is fur- nished to do so, subject to the following
109
+ # conditions:
110
+
111
+ # The above copyright notice and this permission notice shall be
112
+ # included in all copies or substantial portions of the
113
+ # Software.
114
+
115
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
116
+ # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
117
+ # WARRANTIES OF MERCHANTABILITY, FIT- NESS FOR A PARTICULAR
118
+ # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE DANIEL
119
+ # VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
120
+ # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
121
+ # FROM, OUT OF OR IN CON- NECTION WITH THE SOFTWARE OR THE USE
122
+ # OR OTHER DEALINGS IN THE SOFTWARE.
123
+
124
+ # Except as contained in this notice, the name of Daniel
125
+ # Veillard shall not be used in advertising or otherwise to
126
+ # promote the sale, use or other deal- ings in this Software
127
+ # without prior written authorization from him.
128
+
129
+ # Attributes defined and categorized
130
+ FONTSTYLE = ["tt", "i", "b", "u", "s", "strike", "big", "small"]
131
+ PHRASE = ['em', 'strong', 'dfn', 'code', 'samp',
132
+ 'kbd', 'var', 'cite', 'abbr', 'acronym']
133
+ SPECIAL = ['a', 'img', 'applet', 'embed', 'object', 'font','basefont',
134
+ 'br', 'script', 'map', 'q', 'sub', 'sup', 'span', 'bdo',
135
+ 'iframe']
136
+ PCDATA = []
137
+ HEADING = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
138
+ LIST = ['ul', 'ol', 'dir', 'menu']
139
+ FORMCTRL = ['input', 'select', 'textarea', 'label', 'button']
140
+ BLOCK = [HEADING, LIST, 'pre', 'p', 'dl', 'div', 'center', 'noscript',
141
+ 'noframes', 'blockquote', 'form', 'isindex', 'hr', 'table',
142
+ 'fieldset', 'address']
143
+ INLINE = [PCDATA, FONTSTYLE, PHRASE, SPECIAL, FORMCTRL]
144
+ FLOW = [BLOCK, INLINE]
145
+ MODIFIER = []
146
+ EMPTY = []
147
+
148
+ HTML_FLOW = FLOW
149
+ HTML_INLINE = INLINE
150
+ HTML_PCDATA = PCDATA
151
+ HTML_CDATA = HTML_PCDATA
152
+
153
+ COREATTRS = ['id', 'class', 'style', 'title']
154
+ I18N = ['lang', 'dir']
155
+ EVENTS = ['onclick', 'ondblclick', 'onmousedown', 'onmouseup',
156
+ 'onmouseover', 'onmouseout', 'onkeypress', 'onkeydown',
157
+ 'onkeyup']
158
+ ATTRS = [COREATTRS, I18N,EVENTS]
159
+ CELLHALIGN = ['align', 'char', 'charoff']
160
+ CELLVALIGN = ['valign']
161
+
162
+ HTML_ATTRS = ATTRS
163
+ CORE_I18N_ATTRS = [COREATTRS, I18N]
164
+ CORE_ATTRS = COREATTRS
165
+ I18N_ATTRS = I18N
166
+
167
+
168
+ A_ATTRS = [ATTRS, 'charset', 'type', 'name',
169
+ 'href', 'hreflang', 'rel', 'rev', 'accesskey', 'shape',
170
+ 'coords', 'tabindex', 'onfocus', 'onblur']
171
+ TARGET_ATTR = ['target']
172
+ ROWS_COLS_ATTR = ['rows', 'cols']
173
+ ALT_ATTR = ['alt']
174
+ SRC_ALT_ATTRS = ['src', 'alt']
175
+ HREF_ATTRS = ['href']
176
+ CLEAR_ATTRS = ['clear']
177
+ INLINE_P = [INLINE, 'p']
178
+
179
+ FLOW_PARAM = [FLOW, 'param']
180
+ APPLET_ATTRS = [COREATTRS , 'codebase',
181
+ 'archive', 'alt', 'name', 'height', 'width', 'align',
182
+ 'hspace', 'vspace']
183
+ AREA_ATTRS = ['shape', 'coords', 'href', 'nohref',
184
+ 'tabindex', 'accesskey', 'onfocus', 'onblur']
185
+ BASEFONT_ATTRS = ['id', 'size', 'color', 'face']
186
+ QUOTE_ATTRS = [ATTRS, 'cite']
187
+ BODY_CONTENTS = [FLOW, 'ins', 'del']
188
+ BODY_ATTRS = [ATTRS, 'onload', 'onunload']
189
+ BODY_DEPR = ['background', 'bgcolor', 'text',
190
+ 'link', 'vlink', 'alink']
191
+ BUTTON_ATTRS = [ATTRS, 'name', 'value', 'type',
192
+ 'disabled', 'tabindex', 'accesskey', 'onfocus', 'onblur']
193
+
194
+
195
+ COL_ATTRS = [ATTRS, 'span', 'width', CELLHALIGN, CELLVALIGN]
196
+ COL_ELT = ['col']
197
+ EDIT_ATTRS = [ATTRS, 'datetime', 'cite']
198
+ COMPACT_ATTRS = [ATTRS, 'compact']
199
+ DL_CONTENTS = ['dt', 'dd']
200
+ COMPACT_ATTR = ['compact']
201
+ LABEL_ATTR = ['label']
202
+ FIELDSET_CONTENTS = [FLOW, 'legend' ]
203
+ FONT_ATTRS = [COREATTRS, I18N, 'size', 'color', 'face' ]
204
+ FORM_CONTENTS = [HEADING, LIST, INLINE, 'pre', 'p', 'div', 'center',
205
+ 'noscript', 'noframes', 'blockquote', 'isindex', 'hr',
206
+ 'table', 'fieldset', 'address']
207
+ FORM_ATTRS = [ATTRS, 'method', 'enctype', 'accept', 'name', 'onsubmit',
208
+ 'onreset', 'accept-charset']
209
+ FRAME_ATTRS = [COREATTRS, 'longdesc', 'name', 'src', 'frameborder',
210
+ 'marginwidth', 'marginheight', 'noresize', 'scrolling' ]
211
+ FRAMESET_ATTRS = [COREATTRS, 'rows', 'cols', 'onload', 'onunload']
212
+ FRAMESET_CONTENTS = ['frameset', 'frame', 'noframes']
213
+ HEAD_ATTRS = [I18N, 'profile']
214
+ HEAD_CONTENTS = ['title', 'isindex', 'base', 'script', 'style', 'meta',
215
+ 'link', 'object']
216
+ HR_DEPR = ['align', 'noshade', 'size', 'width']
217
+ VERSION_ATTR = ['version']
218
+ HTML_CONTENT = ['head', 'body', 'frameset']
219
+ IFRAME_ATTRS = [COREATTRS, 'longdesc', 'name', 'src', 'frameborder',
220
+ 'marginwidth', 'marginheight', 'scrolling', 'align',
221
+ 'height', 'width']
222
+ IMG_ATTRS = [ATTRS, 'longdesc', 'name', 'height', 'width', 'usemap',
223
+ 'ismap']
224
+ EMBED_ATTRS = [COREATTRS, 'align', 'alt', 'border', 'code', 'codebase',
225
+ 'frameborder', 'height', 'hidden', 'hspace', 'name',
226
+ 'palette', 'pluginspace', 'pluginurl', 'src', 'type',
227
+ 'units', 'vspace', 'width']
228
+ INPUT_ATTRS = [ATTRS, 'type', 'name', 'value', 'checked', 'disabled',
229
+ 'readonly', 'size', 'maxlength', 'src', 'alt', 'usemap',
230
+ 'ismap', 'tabindex', 'accesskey', 'onfocus', 'onblur',
231
+ 'onselect', 'onchange', 'accept']
232
+ PROMPT_ATTRS = [COREATTRS, I18N, 'prompt']
233
+ LABEL_ATTRS = [ATTRS, 'for', 'accesskey', 'onfocus', 'onblur']
234
+ LEGEND_ATTRS = [ATTRS, 'accesskey']
235
+ ALIGN_ATTR = ['align']
236
+ LINK_ATTRS = [ATTRS, 'charset', 'href', 'hreflang', 'type', 'rel', 'rev',
237
+ 'media']
238
+ MAP_CONTENTS = [BLOCK, 'area']
239
+ NAME_ATTR = ['name']
240
+ ACTION_ATTR = ['action']
241
+ BLOCKLI_ELT = [BLOCK, 'li']
242
+ META_ATTRS = [I18N, 'http-equiv', 'name', 'scheme']
243
+ CONTENT_ATTR = ['content']
244
+ TYPE_ATTR = ['type']
245
+ NOFRAMES_CONTENT = ['body', FLOW, MODIFIER]
246
+ OBJECT_CONTENTS = [FLOW, 'param']
247
+ OBJECT_ATTRS = [ATTRS, 'declare', 'classid', 'codebase', 'data', 'type',
248
+ 'codetype', 'archive', 'standby', 'height', 'width',
249
+ 'usemap', 'name', 'tabindex']
250
+ OBJECT_DEPR = ['align', 'border', 'hspace', 'vspace']
251
+ OL_ATTRS = ['type', 'compact', 'start']
252
+ OPTION_ELT = ['option']
253
+ OPTGROUP_ATTRS = [ATTRS, 'disabled']
254
+ OPTION_ATTRS = [ATTRS, 'disabled', 'label', 'selected', 'value']
255
+ PARAM_ATTRS = ['id', 'value', 'valuetype', 'type']
256
+ WIDTH_ATTR = ['width']
257
+ PRE_CONTENT = [PHRASE, 'tt', 'i', 'b', 'u', 's', 'strike', 'a', 'br',
258
+ 'script', 'map', 'q', 'span', 'bdo', 'iframe']
259
+ SCRIPT_ATTRS = ['charset', 'src', 'defer', 'event', 'for']
260
+ LANGUAGE_ATTR = ['language']
261
+ SELECT_CONTENT = ['optgroup', 'option']
262
+ SELECT_ATTRS = [ATTRS, 'name', 'size', 'multiple', 'disabled', 'tabindex',
263
+ 'onfocus', 'onblur', 'onchange']
264
+ STYLE_ATTRS = [I18N, 'media', 'title']
265
+ TABLE_ATTRS = [ATTRS, 'summary', 'width', 'border', 'frame', 'rules',
266
+ 'cellspacing', 'cellpadding', 'datapagesize']
267
+ TABLE_DEPR = ['align', 'bgcolor']
268
+ TABLE_CONTENTS = ['caption', 'col', 'colgroup', 'thead', 'tfoot', 'tbody',
269
+ 'tr']
270
+ TR_ELT = ['tr']
271
+ TALIGN_ATTRS = [ATTRS, CELLHALIGN, CELLVALIGN]
272
+ TH_TD_DEPR = ['nowrap', 'bgcolor', 'width', 'height']
273
+ TH_TD_ATTR = [ATTRS, 'abbr', 'axis', 'headers', 'scope', 'rowspan',
274
+ 'colspan', CELLHALIGN, CELLVALIGN]
275
+ TEXTAREA_ATTRS = [ATTRS, 'name', 'disabled', 'readonly', 'tabindex',
276
+ 'accesskey', 'onfocus', 'onblur', 'onselect',
277
+ 'onchange']
278
+ TR_CONTENTS = ['th', 'td']
279
+ BGCOLOR_ATTR = ['bgcolor']
280
+ LI_ELT = ['li']
281
+ UL_DEPR = ['type', 'compact']
282
+ DIR_ATTR = ['dir']
283
+
284
+ [
285
+ ['a', false, false, false, false, false, :any, true,
286
+ 'anchor ',
287
+ HTML_INLINE, nil, A_ATTRS, TARGET_ATTR, []
288
+ ],
289
+ ['abbr', false, false, false, false, false, :any, true,
290
+ 'abbreviated form',
291
+ HTML_INLINE, nil, HTML_ATTRS, [], []
292
+ ],
293
+ ['acronym', false, false, false, false, false, :any, true, '',
294
+ HTML_INLINE, nil, HTML_ATTRS, [], []
295
+ ],
296
+ ['address', false, false, false, false, false, :any, false,
297
+ 'information on author',
298
+ INLINE_P , nil, HTML_ATTRS, [], []
299
+ ],
300
+ ['applet', false, false, false, false, true, :loose, true,
301
+ 'java applet ',
302
+ FLOW_PARAM, nil, [], APPLET_ATTRS, []
303
+ ],
304
+ ['area', false, true, true, true, false, :any, false,
305
+ 'client-side image map area ',
306
+ EMPTY, nil, AREA_ATTRS, TARGET_ATTR, ALT_ATTR
307
+ ],
308
+ ['b', false, true, false, false, false, :any, true,
309
+ 'bold text style',
310
+ HTML_INLINE, nil, HTML_ATTRS, [], []
311
+ ],
312
+ ['base', false, true, true, true, false, :any, false,
313
+ 'document base uri ',
314
+ EMPTY, nil, [], TARGET_ATTR, HREF_ATTRS
315
+ ],
316
+ ['basefont', false, true, true, true, true, :loose, true,
317
+ 'base font size ',
318
+ EMPTY, nil, [], BASEFONT_ATTRS, []
319
+ ],
320
+ ['bdo', false, false, false, false, false, :any, true,
321
+ 'i18n bidi over-ride ',
322
+ HTML_INLINE, nil, CORE_I18N_ATTRS, [], DIR_ATTR
323
+ ],
324
+ ['big', false, true, false, false, false, :any, true,
325
+ 'large text style',
326
+ HTML_INLINE, nil, HTML_ATTRS, [], []
327
+ ],
328
+ ['blockquote', false, false, false, false, false, :any, false,
329
+ 'long quotation ',
330
+ HTML_FLOW, nil, QUOTE_ATTRS, [], []
331
+ ],
332
+ ['body', true, true, false, false, false, :any, false,
333
+ 'document body ',
334
+ BODY_CONTENTS, 'div', BODY_ATTRS, BODY_DEPR, []
335
+ ],
336
+ ['br', false, true, true, true, false, :any, true,
337
+ 'forced line break ',
338
+ EMPTY, nil, CORE_ATTRS, CLEAR_ATTRS, []
339
+ ],
340
+ ['button', false, false, false, false, false, :any, true,
341
+ 'push button ',
342
+ [HTML_FLOW, MODIFIER], nil, BUTTON_ATTRS, [], []
343
+ ],
344
+ ['caption', false, false, false, false, false, :any, false,
345
+ 'table caption ',
346
+ HTML_INLINE, nil, HTML_ATTRS, [], []
347
+ ],
348
+ ['center', false, true, false, false, true, :loose, false,
349
+ 'shorthand for div align=center ',
350
+ HTML_FLOW, nil, [], HTML_ATTRS, []
351
+ ],
352
+ ['cite', false, false, false, false, false, :any, true, 'citation',
353
+ HTML_INLINE, nil, HTML_ATTRS, [], []
354
+ ],
355
+ ['code', false, false, false, false, false, :any, true,
356
+ 'computer code fragment',
357
+ HTML_INLINE, nil, HTML_ATTRS, [], []
358
+ ],
359
+ ['col', false, true, true, true, false, :any, false, 'table column ',
360
+ EMPTY, nil, COL_ATTRS, [], []
361
+ ],
362
+ ['colgroup', false, true, false, false, false, :any, false,
363
+ 'table column group ',
364
+ COL_ELT, 'col', COL_ATTRS, [], []
365
+ ],
366
+ ['dd', false, true, false, false, false, :any, false,
367
+ 'definition description ',
368
+ HTML_FLOW, nil, HTML_ATTRS, [], []
369
+ ],
370
+ ['del', false, false, false, false, false, :any, true,
371
+ 'deleted text ',
372
+ HTML_FLOW, nil, EDIT_ATTRS, [], []
373
+ ],
374
+ ['dfn', false, false, false, false, false, :any, true,
375
+ 'instance definition',
376
+ HTML_INLINE, nil, HTML_ATTRS, [], []
377
+ ],
378
+ ['dir', false, false, false, false, true, :loose, false,
379
+ 'directory list',
380
+ BLOCKLI_ELT, 'li', [], COMPACT_ATTRS, []
381
+ ],
382
+ ['div', false, false, false, false, false, :any, false,
383
+ 'generic language/style container',
384
+ HTML_FLOW, nil, HTML_ATTRS, ALIGN_ATTR, []
385
+ ],
386
+ ['dl', false, false, false, false, false, :any, false,
387
+ 'definition list ',
388
+ DL_CONTENTS, 'dd', HTML_ATTRS, COMPACT_ATTR, []
389
+ ],
390
+ ['dt', false, true, false, false, false, :any, false,
391
+ 'definition term ',
392
+ HTML_INLINE, nil, HTML_ATTRS, [], []
393
+ ],
394
+ ['em', false, true, false, false, false, :any, true,
395
+ 'emphasis',
396
+ HTML_INLINE, nil, HTML_ATTRS, [], []
397
+ ],
398
+ ['embed', false, true, false, false, true, :loose, true,
399
+ 'generic embedded object ',
400
+ EMPTY, nil, EMBED_ATTRS, [], []
401
+ ],
402
+ ['fieldset', false, false, false, false, false, :any, false,
403
+ 'form control group ',
404
+ FIELDSET_CONTENTS, nil, HTML_ATTRS, [], []
405
+ ],
406
+ ['font', false, true, false, false, true, :loose, true,
407
+ 'local change to font ',
408
+ HTML_INLINE, nil, [], FONT_ATTRS, []
409
+ ],
410
+ ['form', false, false, false, false, false, :any, false,
411
+ 'interactive form ',
412
+ FORM_CONTENTS, 'fieldset', FORM_ATTRS, TARGET_ATTR, ACTION_ATTR
413
+ ],
414
+ ['frame', false, true, true, true, false, :frameset, false,
415
+ 'subwindow ',
416
+ EMPTY, nil, [], FRAME_ATTRS, []
417
+ ],
418
+ ['frameset', false, false, false, false, false, :frameset, false,
419
+ 'window subdivision',
420
+ FRAMESET_CONTENTS, 'noframes', [], FRAMESET_ATTRS, []
421
+ ],
422
+ ['htrue', false, false, false, false, false, :any, false,
423
+ 'heading ',
424
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, []
425
+ ],
426
+ ['htrue', false, false, false, false, false, :any, false,
427
+ 'heading ',
428
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, []
429
+ ],
430
+ ['htrue', false, false, false, false, false, :any, false,
431
+ 'heading ',
432
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, []
433
+ ],
434
+ ['h4', false, false, false, false, false, :any, false,
435
+ 'heading ',
436
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, []
437
+ ],
438
+ ['h5', false, false, false, false, false, :any, false,
439
+ 'heading ',
440
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, []
441
+ ],
442
+ ['h6', false, false, false, false, false, :any, false,
443
+ 'heading ',
444
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, []
445
+ ],
446
+ ['head', true, true, false, false, false, :any, false,
447
+ 'document head ',
448
+ HEAD_CONTENTS, nil, HEAD_ATTRS, [], []
449
+ ],
450
+ ['hr', false, true, true, true, false, :any, false,
451
+ 'horizontal rule ',
452
+ EMPTY, nil, HTML_ATTRS, HR_DEPR, []
453
+ ],
454
+ ['html', true, true, false, false, false, :any, false,
455
+ 'document root element ',
456
+ HTML_CONTENT, nil, I18N_ATTRS, VERSION_ATTR, []
457
+ ],
458
+ ['i', false, true, false, false, false, :any, true,
459
+ 'italic text style',
460
+ HTML_INLINE, nil, HTML_ATTRS, [], []
461
+ ],
462
+ ['iframe', false, false, false, false, false, :any, true,
463
+ 'inline subwindow ',
464
+ HTML_FLOW, nil, [], IFRAME_ATTRS, []
465
+ ],
466
+ ['img', false, true, true, true, false, :any, true,
467
+ 'embedded image ',
468
+ EMPTY, nil, IMG_ATTRS, ALIGN_ATTR, SRC_ALT_ATTRS
469
+ ],
470
+ ['input', false, true, true, true, false, :any, true,
471
+ 'form control ',
472
+ EMPTY, nil, INPUT_ATTRS, ALIGN_ATTR, []
473
+ ],
474
+ ['ins', false, false, false, false, false, :any, true,
475
+ 'inserted text',
476
+ HTML_FLOW, nil, EDIT_ATTRS, [], []
477
+ ],
478
+ ['isindex', false, true, true, true, true, :loose, false,
479
+ 'single line prompt ',
480
+ EMPTY, nil, [], PROMPT_ATTRS, []
481
+ ],
482
+ ['kbd', false, false, false, false, false, :any, true,
483
+ 'text to be entered by the user',
484
+ HTML_INLINE, nil, HTML_ATTRS, [], []
485
+ ],
486
+ ['label', false, false, false, false, false, :any, true,
487
+ 'form field label text ',
488
+ [HTML_INLINE, MODIFIER], nil, LABEL_ATTRS, [], []
489
+ ],
490
+ ['legend', false, false, false, false, false, :any, false,
491
+ 'fieldset legend ',
492
+ HTML_INLINE, nil, LEGEND_ATTRS, ALIGN_ATTR, []
493
+ ],
494
+ ['li', false, true, true, false, false, :any, false,
495
+ 'list item ',
496
+ HTML_FLOW, nil, HTML_ATTRS, [], []
497
+ ],
498
+ ['link', false, true, true, true, false, :any, false,
499
+ 'a media-independent link ',
500
+ EMPTY, nil, LINK_ATTRS, TARGET_ATTR, []
501
+ ],
502
+ ['map', false, false, false, false, false, :any, true,
503
+ 'client-side image map ',
504
+ MAP_CONTENTS, nil, HTML_ATTRS, [], NAME_ATTR
505
+ ],
506
+ ['menu', false, false, false, false, true, :loose, false,
507
+ 'menu list ',
508
+ BLOCKLI_ELT, nil, [], COMPACT_ATTRS, []
509
+ ],
510
+ ['meta', false, true, true, true, false, :any, false,
511
+ 'generic metainformation ',
512
+ EMPTY, nil, META_ATTRS, [], CONTENT_ATTR
513
+ ],
514
+ ['noframes', false, false, false, false, false, :frameset, false,
515
+ 'alternate content container for non frame-based rendering ',
516
+ NOFRAMES_CONTENT, 'body', HTML_ATTRS, [], []
517
+ ],
518
+ ['noscript', false, false, false, false, false, :any, false,
519
+ 'alternate content container for non script-based rendering ',
520
+ HTML_FLOW, 'div', HTML_ATTRS, [], []
521
+ ],
522
+ ['object', false, false, false, false, false, :any, true,
523
+ 'generic embedded object ',
524
+ OBJECT_CONTENTS, 'div', OBJECT_ATTRS, OBJECT_DEPR, []
525
+ ],
526
+ ['ol', false, false, false, false, false, :any, false,
527
+ 'ordered list ',
528
+ LI_ELT, 'li', HTML_ATTRS, OL_ATTRS, []
529
+ ],
530
+ ['optgroup', false, false, false, false, false, :any, false,
531
+ 'option group ',
532
+ OPTION_ELT, 'option', OPTGROUP_ATTRS, [], LABEL_ATTR
533
+ ],
534
+ ['option', false, true, false, false, false, :any, false,
535
+ 'selectable choice ',
536
+ HTML_PCDATA, nil, OPTION_ATTRS, [], []
537
+ ],
538
+ ['p', false, true, false, false, false, :any, false,
539
+ 'paragraph ',
540
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, []
541
+ ],
542
+ ['param', false, true, true, true, false, :any, false,
543
+ 'named property value ',
544
+ EMPTY, nil, PARAM_ATTRS, [], NAME_ATTR
545
+ ],
546
+ ['pre', false, false, false, false, false, :any, false,
547
+ 'preformatted text ',
548
+ PRE_CONTENT, nil, HTML_ATTRS, WIDTH_ATTR, []
549
+ ],
550
+ ['q', false, false, false, false, false, :any, true,
551
+ 'short inline quotation ',
552
+ HTML_INLINE, nil, QUOTE_ATTRS, [], []
553
+ ],
554
+ ['s', false, true, false, false, true, :loose, true,
555
+ 'strike-through text style',
556
+ HTML_INLINE, nil, [], HTML_ATTRS, []
557
+ ],
558
+ ['samp', false, false, false, false, false, :any, true,
559
+ 'sample program output, scripts, etc.',
560
+ HTML_INLINE, nil, HTML_ATTRS, [], []
561
+ ],
562
+ ['script', false, false, false, false, false, :any, true,
563
+ 'script statements ',
564
+ HTML_CDATA, nil, SCRIPT_ATTRS, LANGUAGE_ATTR, TYPE_ATTR
565
+ ],
566
+ ['select', false, false, false, false, false, :any, true,
567
+ 'option selector ',
568
+ SELECT_CONTENT, nil, SELECT_ATTRS, [], []
569
+ ],
570
+ ['small', false, true, false, false, false, :any, true,
571
+ 'small text style',
572
+ HTML_INLINE, nil, HTML_ATTRS, [], []
573
+ ],
574
+ ['span', false, false, false, false, false, :any, true,
575
+ 'generic language/style container ',
576
+ HTML_INLINE, nil, HTML_ATTRS, [], []
577
+ ],
578
+ ['strike', false, true, false, false, true, :loose, true,
579
+ 'strike-through text',
580
+ HTML_INLINE, nil, [], HTML_ATTRS, []
581
+ ],
582
+ ['strong', false, true, false, false, false, :any, true,
583
+ 'strong emphasis',
584
+ HTML_INLINE, nil, HTML_ATTRS, [], []
585
+ ],
586
+ ['style', false, false, false, false, false, :any, false,
587
+ 'style info ',
588
+ HTML_CDATA, nil, STYLE_ATTRS, [], TYPE_ATTR
589
+ ],
590
+ ['sub', false, true, false, false, false, :any, true,
591
+ 'subscript',
592
+ HTML_INLINE, nil, HTML_ATTRS, [], []
593
+ ],
594
+ ['sup', false, true, false, false, false, :any, true,
595
+ 'superscript ',
596
+ HTML_INLINE, nil, HTML_ATTRS, [], []
597
+ ],
598
+ ['table', false, false, false, false, false, :any, false,
599
+ '',
600
+ TABLE_CONTENTS, 'tr', TABLE_ATTRS, TABLE_DEPR, []
601
+ ],
602
+ ['tbody', true, false, false, false, false, :any, false,
603
+ 'table body ',
604
+ TR_ELT, 'tr', TALIGN_ATTRS, [], []
605
+ ],
606
+ ['td', false, false, false, false, false, :any, false,
607
+ 'table data cell',
608
+ HTML_FLOW, nil, TH_TD_ATTR, TH_TD_DEPR, []
609
+ ],
610
+ ['textarea', false, false, false, false, false, :any, true,
611
+ 'multi-line text field ',
612
+ HTML_PCDATA, nil, TEXTAREA_ATTRS, [], ROWS_COLS_ATTR
613
+ ],
614
+ ['tfoot', false, true, false, false, false, :any, false,
615
+ 'table footer ',
616
+ TR_ELT, 'tr', TALIGN_ATTRS, [], []
617
+ ],
618
+ ['th', false, true, false, false, false, :any, false,
619
+ 'table header cell',
620
+ HTML_FLOW, nil, TH_TD_ATTR, TH_TD_DEPR, []
621
+ ],
622
+ ['thead', false, true, false, false, false, :any, false,
623
+ 'table header ',
624
+ TR_ELT, 'tr', TALIGN_ATTRS, [], []
625
+ ],
626
+ ['title', false, false, false, false, false, :any, false,
627
+ 'document title ',
628
+ HTML_PCDATA, nil, I18N_ATTRS, [], []
629
+ ],
630
+ ['tr', false, false, false, false, false, :any, false,
631
+ 'table row ',
632
+ TR_CONTENTS, 'td', TALIGN_ATTRS, BGCOLOR_ATTR, []
633
+ ],
634
+ ['tt', false, true, false, false, false, :any, true,
635
+ 'teletype or monospaced text style',
636
+ HTML_INLINE, nil, HTML_ATTRS, [], []
637
+ ],
638
+ ['u', false, true, false, false, true, :loose, true,
639
+ 'underlined text style',
640
+ HTML_INLINE, nil, [], HTML_ATTRS, []
641
+ ],
642
+ ['ul', false, false, false, false, false, :any, false,
643
+ 'unordered list ',
644
+ LI_ELT, 'li', HTML_ATTRS, UL_DEPR, []
645
+ ],
646
+ ['var', false, false, false, false, false, :any, true,
647
+ 'instance of a variable or program argument',
648
+ HTML_INLINE, nil, HTML_ATTRS, [], []
649
+ ]
650
+ ].each do |descriptor|
651
+ name = descriptor[0]
652
+
653
+ begin
654
+ d = Desc.new(*descriptor)
655
+
656
+ # flatten all the attribute lists (Ruby1.9, *[a,b,c] can be
657
+ # used to flatten a literal list, but not in Ruby1.8).
658
+ d[:subelts] = d[:subelts].flatten
659
+ d[:attrs_opt] = d[:attrs_opt].flatten
660
+ d[:attrs_depr] = d[:attrs_depr].flatten
661
+ d[:attrs_req] = d[:attrs_req].flatten
662
+ rescue => e
663
+ p name
664
+ raise e
665
+ end
666
+
667
+ DefaultDescriptions[name] = d
668
+ end
669
+ end
670
+ end
671
+ end