nokogiri-maglev- 1.5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. data/.autotest +26 -0
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.ja.rdoc +544 -0
  4. data/CHANGELOG.rdoc +532 -0
  5. data/Manifest.txt +283 -0
  6. data/README.ja.rdoc +106 -0
  7. data/README.rdoc +174 -0
  8. data/Rakefile +171 -0
  9. data/bin/nokogiri +53 -0
  10. data/ext/nokogiri/depend +358 -0
  11. data/ext/nokogiri/extconf.rb +124 -0
  12. data/ext/nokogiri/html_document.c +154 -0
  13. data/ext/nokogiri/html_document.h +10 -0
  14. data/ext/nokogiri/html_element_description.c +276 -0
  15. data/ext/nokogiri/html_element_description.h +10 -0
  16. data/ext/nokogiri/html_entity_lookup.c +32 -0
  17. data/ext/nokogiri/html_entity_lookup.h +8 -0
  18. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  19. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  20. data/ext/nokogiri/nokogiri.c +115 -0
  21. data/ext/nokogiri/nokogiri.h +160 -0
  22. data/ext/nokogiri/st.c +576 -0
  23. data/ext/nokogiri/xml_attr.c +94 -0
  24. data/ext/nokogiri/xml_attr.h +9 -0
  25. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  26. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  27. data/ext/nokogiri/xml_cdata.c +56 -0
  28. data/ext/nokogiri/xml_cdata.h +9 -0
  29. data/ext/nokogiri/xml_comment.c +54 -0
  30. data/ext/nokogiri/xml_comment.h +9 -0
  31. data/ext/nokogiri/xml_document.c +478 -0
  32. data/ext/nokogiri/xml_document.h +23 -0
  33. data/ext/nokogiri/xml_document_fragment.c +48 -0
  34. data/ext/nokogiri/xml_document_fragment.h +10 -0
  35. data/ext/nokogiri/xml_dtd.c +202 -0
  36. data/ext/nokogiri/xml_dtd.h +10 -0
  37. data/ext/nokogiri/xml_element_content.c +123 -0
  38. data/ext/nokogiri/xml_element_content.h +10 -0
  39. data/ext/nokogiri/xml_element_decl.c +69 -0
  40. data/ext/nokogiri/xml_element_decl.h +9 -0
  41. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  42. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  43. data/ext/nokogiri/xml_entity_decl.c +110 -0
  44. data/ext/nokogiri/xml_entity_decl.h +10 -0
  45. data/ext/nokogiri/xml_entity_reference.c +52 -0
  46. data/ext/nokogiri/xml_entity_reference.h +9 -0
  47. data/ext/nokogiri/xml_io.c +56 -0
  48. data/ext/nokogiri/xml_io.h +11 -0
  49. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  50. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  51. data/ext/nokogiri/xml_namespace.c +84 -0
  52. data/ext/nokogiri/xml_namespace.h +13 -0
  53. data/ext/nokogiri/xml_node.c +1397 -0
  54. data/ext/nokogiri/xml_node.h +13 -0
  55. data/ext/nokogiri/xml_node_set.c +418 -0
  56. data/ext/nokogiri/xml_node_set.h +9 -0
  57. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  58. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  59. data/ext/nokogiri/xml_reader.c +684 -0
  60. data/ext/nokogiri/xml_reader.h +10 -0
  61. data/ext/nokogiri/xml_relax_ng.c +162 -0
  62. data/ext/nokogiri/xml_relax_ng.h +9 -0
  63. data/ext/nokogiri/xml_sax_parser.c +293 -0
  64. data/ext/nokogiri/xml_sax_parser.h +39 -0
  65. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  66. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  67. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  68. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  69. data/ext/nokogiri/xml_schema.c +205 -0
  70. data/ext/nokogiri/xml_schema.h +9 -0
  71. data/ext/nokogiri/xml_syntax_error.c +58 -0
  72. data/ext/nokogiri/xml_syntax_error.h +13 -0
  73. data/ext/nokogiri/xml_text.c +50 -0
  74. data/ext/nokogiri/xml_text.h +9 -0
  75. data/ext/nokogiri/xml_xpath_context.c +315 -0
  76. data/ext/nokogiri/xml_xpath_context.h +9 -0
  77. data/ext/nokogiri/xslt_stylesheet.c +265 -0
  78. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  79. data/lib/nokogiri.rb +127 -0
  80. data/lib/nokogiri/css.rb +27 -0
  81. data/lib/nokogiri/css/node.rb +99 -0
  82. data/lib/nokogiri/css/parser.rb +677 -0
  83. data/lib/nokogiri/css/parser.y +237 -0
  84. data/lib/nokogiri/css/parser_extras.rb +91 -0
  85. data/lib/nokogiri/css/syntax_error.rb +7 -0
  86. data/lib/nokogiri/css/tokenizer.rb +152 -0
  87. data/lib/nokogiri/css/tokenizer.rex +55 -0
  88. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  89. data/lib/nokogiri/decorators/slop.rb +35 -0
  90. data/lib/nokogiri/html.rb +36 -0
  91. data/lib/nokogiri/html/builder.rb +35 -0
  92. data/lib/nokogiri/html/document.rb +213 -0
  93. data/lib/nokogiri/html/document_fragment.rb +41 -0
  94. data/lib/nokogiri/html/element_description.rb +23 -0
  95. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  96. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  97. data/lib/nokogiri/html/sax/parser.rb +52 -0
  98. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  99. data/lib/nokogiri/syntax_error.rb +4 -0
  100. data/lib/nokogiri/version.rb +88 -0
  101. data/lib/nokogiri/xml.rb +67 -0
  102. data/lib/nokogiri/xml/attr.rb +14 -0
  103. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  104. data/lib/nokogiri/xml/builder.rb +426 -0
  105. data/lib/nokogiri/xml/cdata.rb +11 -0
  106. data/lib/nokogiri/xml/character_data.rb +7 -0
  107. data/lib/nokogiri/xml/document.rb +234 -0
  108. data/lib/nokogiri/xml/document_fragment.rb +98 -0
  109. data/lib/nokogiri/xml/dtd.rb +22 -0
  110. data/lib/nokogiri/xml/element_content.rb +36 -0
  111. data/lib/nokogiri/xml/element_decl.rb +13 -0
  112. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  113. data/lib/nokogiri/xml/namespace.rb +13 -0
  114. data/lib/nokogiri/xml/node.rb +915 -0
  115. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  116. data/lib/nokogiri/xml/node_set.rb +357 -0
  117. data/lib/nokogiri/xml/notation.rb +6 -0
  118. data/lib/nokogiri/xml/parse_options.rb +93 -0
  119. data/lib/nokogiri/xml/pp.rb +2 -0
  120. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  121. data/lib/nokogiri/xml/pp/node.rb +56 -0
  122. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  123. data/lib/nokogiri/xml/reader.rb +112 -0
  124. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  125. data/lib/nokogiri/xml/sax.rb +4 -0
  126. data/lib/nokogiri/xml/sax/document.rb +164 -0
  127. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  128. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  129. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  130. data/lib/nokogiri/xml/schema.rb +63 -0
  131. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  132. data/lib/nokogiri/xml/text.rb +9 -0
  133. data/lib/nokogiri/xml/xpath.rb +10 -0
  134. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  135. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  136. data/lib/nokogiri/xslt.rb +52 -0
  137. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  138. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  139. data/nokogiri_help_responses.md +40 -0
  140. data/tasks/cross_compile.rb +152 -0
  141. data/tasks/nokogiri.org.rb +18 -0
  142. data/tasks/test.rb +94 -0
  143. data/test/css/test_nthiness.rb +159 -0
  144. data/test/css/test_parser.rb +303 -0
  145. data/test/css/test_tokenizer.rb +198 -0
  146. data/test/css/test_xpath_visitor.rb +85 -0
  147. data/test/decorators/test_slop.rb +16 -0
  148. data/test/files/2ch.html +108 -0
  149. data/test/files/address_book.rlx +12 -0
  150. data/test/files/address_book.xml +10 -0
  151. data/test/files/bar/bar.xsd +4 -0
  152. data/test/files/dont_hurt_em_why.xml +422 -0
  153. data/test/files/encoding.html +82 -0
  154. data/test/files/encoding.xhtml +84 -0
  155. data/test/files/exslt.xml +8 -0
  156. data/test/files/exslt.xslt +35 -0
  157. data/test/files/foo/foo.xsd +4 -0
  158. data/test/files/metacharset.html +10 -0
  159. data/test/files/noencoding.html +47 -0
  160. data/test/files/po.xml +32 -0
  161. data/test/files/po.xsd +66 -0
  162. data/test/files/shift_jis.html +10 -0
  163. data/test/files/shift_jis.xml +5 -0
  164. data/test/files/snuggles.xml +3 -0
  165. data/test/files/staff.dtd +10 -0
  166. data/test/files/staff.xml +59 -0
  167. data/test/files/staff.xslt +32 -0
  168. data/test/files/tlm.html +850 -0
  169. data/test/files/valid_bar.xml +2 -0
  170. data/test/helper.rb +173 -0
  171. data/test/html/sax/test_parser.rb +139 -0
  172. data/test/html/sax/test_parser_context.rb +48 -0
  173. data/test/html/test_builder.rb +165 -0
  174. data/test/html/test_document.rb +472 -0
  175. data/test/html/test_document_encoding.rb +138 -0
  176. data/test/html/test_document_fragment.rb +255 -0
  177. data/test/html/test_element_description.rb +101 -0
  178. data/test/html/test_named_characters.rb +14 -0
  179. data/test/html/test_node.rb +193 -0
  180. data/test/html/test_node_encoding.rb +27 -0
  181. data/test/test_convert_xpath.rb +135 -0
  182. data/test/test_css_cache.rb +45 -0
  183. data/test/test_encoding_handler.rb +46 -0
  184. data/test/test_memory_leak.rb +72 -0
  185. data/test/test_nokogiri.rb +133 -0
  186. data/test/test_reader.rb +425 -0
  187. data/test/test_soap4r_sax.rb +52 -0
  188. data/test/test_xslt_transforms.rb +193 -0
  189. data/test/xml/node/test_save_options.rb +28 -0
  190. data/test/xml/node/test_subclass.rb +44 -0
  191. data/test/xml/sax/test_parser.rb +338 -0
  192. data/test/xml/sax/test_parser_context.rb +113 -0
  193. data/test/xml/sax/test_push_parser.rb +156 -0
  194. data/test/xml/test_attr.rb +65 -0
  195. data/test/xml/test_attribute_decl.rb +86 -0
  196. data/test/xml/test_builder.rb +227 -0
  197. data/test/xml/test_cdata.rb +50 -0
  198. data/test/xml/test_comment.rb +29 -0
  199. data/test/xml/test_document.rb +697 -0
  200. data/test/xml/test_document_encoding.rb +26 -0
  201. data/test/xml/test_document_fragment.rb +192 -0
  202. data/test/xml/test_dtd.rb +107 -0
  203. data/test/xml/test_dtd_encoding.rb +33 -0
  204. data/test/xml/test_element_content.rb +56 -0
  205. data/test/xml/test_element_decl.rb +73 -0
  206. data/test/xml/test_entity_decl.rb +122 -0
  207. data/test/xml/test_entity_reference.rb +21 -0
  208. data/test/xml/test_namespace.rb +70 -0
  209. data/test/xml/test_node.rb +917 -0
  210. data/test/xml/test_node_attributes.rb +34 -0
  211. data/test/xml/test_node_encoding.rb +107 -0
  212. data/test/xml/test_node_reparenting.rb +334 -0
  213. data/test/xml/test_node_set.rb +742 -0
  214. data/test/xml/test_parse_options.rb +52 -0
  215. data/test/xml/test_processing_instruction.rb +30 -0
  216. data/test/xml/test_reader_encoding.rb +126 -0
  217. data/test/xml/test_relax_ng.rb +60 -0
  218. data/test/xml/test_schema.rb +94 -0
  219. data/test/xml/test_syntax_error.rb +12 -0
  220. data/test/xml/test_text.rb +47 -0
  221. data/test/xml/test_unparented_node.rb +381 -0
  222. data/test/xml/test_xpath.rb +237 -0
  223. data/test/xslt/test_custom_functions.rb +94 -0
  224. data/test/xslt/test_exception_handling.rb +37 -0
  225. metadata +548 -0
@@ -0,0 +1,55 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Tokenizer
4
+
5
+ macro
6
+ nl \n|\r\n|\r|\f
7
+ w [\s]*
8
+ nonascii [^\0-\177]
9
+ num -?([0-9]+|[0-9]*\.[0-9]+)
10
+ unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
11
+
12
+ escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
13
+ nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_A-Za-z]|{nonascii}|{escape}
15
+ ident [-@]?({nmstart})({nmchar})*
16
+ name ({nmchar})+
17
+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
18
+ string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
19
+ string {string1}|{string2}
20
+
21
+ rule
22
+
23
+ # [:state] pattern [actions]
24
+
25
+ has\({w} { [:HAS, text] }
26
+ {ident}\({w} { [:FUNCTION, text] }
27
+ {ident} { [:IDENT, text] }
28
+ \#{name} { [:HASH, text] }
29
+ {w}~={w} { [:INCLUDES, text] }
30
+ {w}\|={w} { [:DASHMATCH, text] }
31
+ {w}\^={w} { [:PREFIXMATCH, text] }
32
+ {w}\$={w} { [:SUFFIXMATCH, text] }
33
+ {w}\*={w} { [:SUBSTRINGMATCH, text] }
34
+ {w}!={w} { [:NOT_EQUAL, text] }
35
+ {w}={w} { [:EQUAL, text] }
36
+ {w}\) { [:RPAREN, text] }
37
+ {w}\[{w} { [:LSQUARE, text] }
38
+ {w}\] { [:RSQUARE, text] }
39
+ {w}\+{w} { [:PLUS, text] }
40
+ {w}>{w} { [:GREATER, text] }
41
+ {w},{w} { [:COMMA, text] }
42
+ {w}~{w} { [:TILDE, text] }
43
+ \:not\({w} { [:NOT, text] }
44
+ {num} { [:NUMBER, text] }
45
+ {w}\/\/{w} { [:DOUBLESLASH, text] }
46
+ {w}\/{w} { [:SLASH, text] }
47
+
48
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
49
+
50
+ [\s]+ { [:S, text] }
51
+ {string} { [:STRING, text] }
52
+ . { [text, text] }
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,171 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class XPathVisitor # :nodoc:
4
+ def visit_function node
5
+ # note that nth-child and nth-last-child are preprocessed in css/node.rb.
6
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
+ return self.send(msg, node) if self.respond_to?(msg)
8
+
9
+ case node.value.first
10
+ when /^text\(/
11
+ 'child::text()'
12
+ when /^self\(/
13
+ "self::#{node.value[1]}"
14
+ when /^eq\(/
15
+ "position() = #{node.value[1]}"
16
+ when /^(nth|nth-of-type|nth-child)\(/
17
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
18
+ an_plus_b(node.value[1])
19
+ else
20
+ "position() = #{node.value[1]}"
21
+ end
22
+ when /^(nth-last-child|nth-last-of-type)\(/
23
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
24
+ an_plus_b(node.value[1], :last => true)
25
+ else
26
+ index = node.value[1].to_i - 1
27
+ index == 0 ? "position() = last()" : "position() = last() - #{index}"
28
+ end
29
+ when /^(first|first-of-type)\(/
30
+ "position() = 1"
31
+ when /^(last|last-of-type)\(/
32
+ "position() = last()"
33
+ when /^contains\(/
34
+ "contains(., #{node.value[1]})"
35
+ when /^gt\(/
36
+ "position() > #{node.value[1]}"
37
+ when /^only-child\(/
38
+ "last() = 1"
39
+ when /^comment\(/
40
+ "comment()"
41
+ when /^has\(/
42
+ node.value[1].accept(self)
43
+ else
44
+ args = ['.'] + node.value[1..-1]
45
+ "#{node.value.first}#{args.join(', ')})"
46
+ end
47
+ end
48
+
49
+ def visit_not node
50
+ child = node.value.first
51
+ if :ELEMENT_NAME == child.type
52
+ "not(self::#{child.accept(self)})"
53
+ else
54
+ "not(#{child.accept(self)})"
55
+ end
56
+ end
57
+
58
+ def visit_id node
59
+ node.value.first =~ /^#(.*)$/
60
+ "@id = '#{$1}'"
61
+ end
62
+
63
+ def visit_attribute_condition node
64
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
65
+ ''
66
+ else
67
+ '@'
68
+ end
69
+ attribute += node.value.first.accept(self)
70
+
71
+ # Support non-standard css
72
+ attribute.gsub!(/^@@/, '@')
73
+
74
+ return attribute unless node.value.length == 3
75
+
76
+ value = node.value.last
77
+ value = "'#{value}'" if value !~ /^['"]/
78
+
79
+ case node.value[1]
80
+ when :equal
81
+ attribute + " = " + "#{value}"
82
+ when :not_equal
83
+ attribute + " != " + "#{value}"
84
+ when :substring_match
85
+ "contains(#{attribute}, #{value})"
86
+ when :prefix_match
87
+ "starts-with(#{attribute}, #{value})"
88
+ when :dash_match
89
+ "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
90
+ when :includes
91
+ "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
92
+ when :suffix_match
93
+ "substring(#{attribute}, string-length(#{attribute}) - " +
94
+ "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
95
+ else
96
+ attribute + " #{node.value[1]} " + "#{value}"
97
+ end
98
+ end
99
+
100
+ def visit_pseudo_class node
101
+ if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
102
+ node.value.first.accept(self)
103
+ else
104
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
105
+ return self.send(msg, node) if self.respond_to?(msg)
106
+
107
+ case node.value.first
108
+ when "first", "first-child" then "position() = 1"
109
+ when "last", "last-child" then "position() = last()"
110
+ when "first-of-type" then "position() = 1"
111
+ when "last-of-type" then "position() = last()"
112
+ when "only-of-type" then "last() = 1"
113
+ when "empty" then "not(node())"
114
+ when "parent" then "node()"
115
+ when "root" then "not(parent::*)"
116
+ else
117
+ node.value.first + "(.)"
118
+ end
119
+ end
120
+ end
121
+
122
+ def visit_class_condition node
123
+ "contains(concat(' ', @class, ' '), ' #{node.value.first} ')"
124
+ end
125
+
126
+ {
127
+ 'combinator' => ' and ',
128
+ 'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
129
+ 'preceding_selector' => "/following-sibling::",
130
+ 'descendant_selector' => '//',
131
+ 'child_selector' => '/',
132
+ }.each do |k,v|
133
+ class_eval %{
134
+ def visit_#{k} node
135
+ "\#{node.value.first.accept(self)}#{v}\#{node.value.last.accept(self)}"
136
+ end
137
+ }
138
+ end
139
+
140
+ def visit_conditional_selector node
141
+ node.value.first.accept(self) + '[' +
142
+ node.value.last.accept(self) + ']'
143
+ end
144
+
145
+ def visit_element_name node
146
+ node.value.first
147
+ end
148
+
149
+ def accept node
150
+ node.accept(self)
151
+ end
152
+
153
+ private
154
+ def an_plus_b node, options={}
155
+ raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
156
+
157
+ a = node.value[0].to_i
158
+ b = node.value[3].to_i
159
+ position = options[:last] ? "(last()-position()+1)" : "position()"
160
+
161
+ if (b == 0)
162
+ return "(#{position} mod #{a}) = 0"
163
+ else
164
+ compare = (a < 0) ? "<=" : ">="
165
+ return "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
166
+ end
167
+ end
168
+
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,35 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ ###
4
+ # The Slop decorator implements method missing such that a methods may be
5
+ # used instead of XPath or CSS. See Nokogiri.Slop
6
+ module Slop
7
+ ###
8
+ # look for node with +name+. See Nokogiri.Slop
9
+ def method_missing name, *args, &block
10
+ prefix = implied_xpath_context
11
+
12
+ if args.empty?
13
+ list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
14
+ elsif args.first.is_a? Hash
15
+ hash = args.first
16
+ if hash[:css]
17
+ list = css("#{name}#{hash[:css]}")
18
+ elsif hash[:xpath]
19
+ conds = Array(hash[:xpath]).join(' and ')
20
+ list = xpath("#{prefix}#{name}[#{conds}]")
21
+ end
22
+ else
23
+ CSS::Parser.without_cache do
24
+ list = xpath(
25
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => prefix)
26
+ )
27
+ end
28
+ end
29
+
30
+ super if list.empty?
31
+ list.length == 1 ? list.first : list
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,36 @@
1
+ require 'nokogiri/html/entity_lookup'
2
+ require 'nokogiri/html/document'
3
+ require 'nokogiri/html/document_fragment'
4
+ require 'nokogiri/html/sax/parser_context'
5
+ require 'nokogiri/html/sax/parser'
6
+ require 'nokogiri/html/element_description'
7
+ require 'nokogiri/html/element_description_defaults'
8
+
9
+ module Nokogiri
10
+ class << self
11
+ ###
12
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
13
+ def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
14
+ Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
15
+ end
16
+ end
17
+
18
+ module HTML
19
+ class << self
20
+ ###
21
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
22
+ def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
23
+ Document.parse(thing, url, encoding, options, &block)
24
+ end
25
+
26
+ ####
27
+ # Parse a fragment from +string+ in to a NodeSet.
28
+ def fragment string, encoding = nil
29
+ HTML::DocumentFragment.parse string, encoding
30
+ end
31
+ end
32
+
33
+ # Instance of Nokogiri::HTML::EntityLookup
34
+ NamedCharacters = EntityLookup.new
35
+ end
36
+ end
@@ -0,0 +1,35 @@
1
+ module Nokogiri
2
+ module HTML
3
+ ###
4
+ # Nokogiri HTML builder is used for building HTML documents. It is very
5
+ # similar to the Nokogiri::XML::Builder. In fact, you should go read the
6
+ # documentation for Nokogiri::XML::Builder before reading this
7
+ # documentation.
8
+ #
9
+ # == Synopsis:
10
+ #
11
+ # Create an HTML document with a body that has an onload attribute, and a
12
+ # span tag with a class of "bold" that has content of "Hello world".
13
+ #
14
+ # builder = Nokogiri::HTML::Builder.new do |doc|
15
+ # doc.html {
16
+ # doc.body(:onload => 'some_func();') {
17
+ # doc.span.bold {
18
+ # doc.text "Hello world"
19
+ # }
20
+ # }
21
+ # }
22
+ # end
23
+ # puts builder.to_html
24
+ #
25
+ # The HTML builder inherits from the XML builder, so make sure to read the
26
+ # Nokogiri::XML::Builder documentation.
27
+ class Builder < Nokogiri::XML::Builder
28
+ ###
29
+ # Convert the builder to HTML
30
+ def to_html
31
+ @doc.to_html
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,213 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class Document < Nokogiri::XML::Document
4
+ ###
5
+ # Get the meta tag encoding for this document. If there is no meta tag,
6
+ # then nil is returned.
7
+ def meta_encoding
8
+ meta = meta_content_type and
9
+ /charset\s*=\s*([\w-]+)/i.match(meta['content'])[1]
10
+ end
11
+
12
+ ###
13
+ # Set the meta tag encoding for this document. If there is no meta
14
+ # content tag, the encoding is not set.
15
+ def meta_encoding= encoding
16
+ meta = meta_content_type and
17
+ meta['content'] = "text/html; charset=%s" % encoding
18
+ end
19
+
20
+ def meta_content_type
21
+ css('meta[@http-equiv]').find { |node|
22
+ node['http-equiv'] =~ /\AContent-Type\z/i
23
+ }
24
+ end
25
+ private :meta_content_type
26
+
27
+ ###
28
+ # Get the title string of this document. Return nil if there is
29
+ # no title tag.
30
+ def title
31
+ title = at('title') and title.inner_text
32
+ end
33
+
34
+ ###
35
+ # Set the title string of this document. If there is no head
36
+ # element, the title is not set.
37
+ def title=(text)
38
+ unless title = at('title')
39
+ head = at('head') or return nil
40
+ title = Nokogiri::XML::Node.new('title', self)
41
+ head << title
42
+ end
43
+ title.children = XML::Text.new(text, self)
44
+ end
45
+
46
+ ####
47
+ # Serialize Node using +options+. Save options can also be set using a
48
+ # block. See SaveOptions.
49
+ #
50
+ # These two statements are equivalent:
51
+ #
52
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
53
+ #
54
+ # or
55
+ #
56
+ # node.serialize(:encoding => 'UTF-8') do |config|
57
+ # config.format.as_xml
58
+ # end
59
+ #
60
+ def serialize options = {}
61
+ options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
62
+ super
63
+ end
64
+
65
+ ####
66
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
67
+ def fragment tags = nil
68
+ DocumentFragment.new(self, tags, self.root)
69
+ end
70
+
71
+ class << self
72
+ ###
73
+ # Parse HTML. +string_or_io+ may be a String, or any object that
74
+ # responds to _read_ and _close_ such as an IO, or StringIO.
75
+ # +url+ is resource where this document is located. +encoding+ is the
76
+ # encoding that should be used when processing the document. +options+
77
+ # is a number that sets options in the parser, such as
78
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
79
+ # Nokogiri::XML::ParseOptions.
80
+ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
81
+
82
+ options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
83
+ # Give the options to the user
84
+ yield options if block_given?
85
+
86
+ if string_or_io.respond_to?(:encoding)
87
+ unless string_or_io.encoding.name == "ASCII-8BIT"
88
+ encoding ||= string_or_io.encoding.name
89
+ end
90
+ end
91
+
92
+ if string_or_io.respond_to?(:read)
93
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
94
+ if !encoding
95
+ # Perform advanced encoding detection that libxml2 does
96
+ # not do.
97
+ string_or_io = EncodingReader.new(string_or_io)
98
+ begin
99
+ return read_io(string_or_io, url, encoding, options.to_i)
100
+ rescue EncodingFoundException => e
101
+ # A retry is required because libxml2 has a problem in
102
+ # that it cannot switch encoding well in the middle of
103
+ # parsing, especially if it has already seen a
104
+ # non-ASCII character when it finds an encoding hint.
105
+ encoding = e.encoding
106
+ end
107
+ end
108
+ return read_io(string_or_io, url, encoding, options.to_i)
109
+ end
110
+
111
+ # read_memory pukes on empty docs
112
+ return new if string_or_io.nil? or string_or_io.empty?
113
+
114
+ if !encoding
115
+ encoding = EncodingReader.detect_encoding(string_or_io)
116
+ end
117
+
118
+ read_memory(string_or_io, url, encoding, options.to_i)
119
+ end
120
+ end
121
+
122
+ class EncodingFoundException < Exception # :nodoc:
123
+ attr_reader :encoding
124
+
125
+ def initialize(encoding)
126
+ @encoding = encoding
127
+ super("encoding found: %s" % encoding)
128
+ end
129
+ end
130
+
131
+ class EncodingReader # :nodoc:
132
+ class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
133
+ attr_reader :encoding
134
+
135
+ def found(encoding)
136
+ @encoding = encoding
137
+ throw :found
138
+ end
139
+
140
+ def not_found(encoding)
141
+ found nil
142
+ end
143
+
144
+ def start_element(name, attrs = [])
145
+ case name
146
+ when /\A(?:div|h1|img|p|br)\z/
147
+ not_found
148
+ when 'meta'
149
+ attr = Hash[attrs]
150
+ charset = attr['charset'] and
151
+ found charset
152
+ http_equiv = attr['http-equiv'] and
153
+ http_equiv.match(/\AContent-Type\z/i) and
154
+ content = attr['content'] and
155
+ m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
156
+ found m[1]
157
+ end
158
+ end
159
+ end
160
+
161
+ def self.detect_encoding(chunk)
162
+ m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
163
+ return Nokogiri.XML(m[1]).encoding
164
+
165
+ if Nokogiri.jruby?
166
+ m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
167
+ return m[4]
168
+ end
169
+
170
+ handler = SAXHandler.new
171
+ parser = Nokogiri::HTML::SAX::Parser.new(handler)
172
+ catch(:found) {
173
+ parser.parse(chunk)
174
+ }
175
+ handler.encoding
176
+ rescue
177
+ nil
178
+ end
179
+
180
+ def initialize(io)
181
+ @io = io
182
+ @firstchunk = nil
183
+ end
184
+
185
+ def read(len)
186
+ # no support for a call without len
187
+
188
+ if !@firstchunk
189
+ @firstchunk = @io.read(len) or return nil
190
+
191
+ # This implementation expects that the first call from
192
+ # htmlReadIO() is made with a length long enough (~1KB) to
193
+ # achieve advanced encoding detection.
194
+ if encoding = EncodingReader.detect_encoding(@firstchunk)
195
+ # The first chunk is stored for the next read in retry.
196
+ raise EncodingFoundException, encoding
197
+ end
198
+ end
199
+
200
+ ret = @firstchunk.slice!(0, len)
201
+ if (len -= ret.length) > 0
202
+ rest = @io.read(len) and ret << rest
203
+ end
204
+ if ret.empty?
205
+ nil
206
+ else
207
+ ret
208
+ end
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end