nokogiri 1.6.2.rc1-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.editorconfig +17 -0
  4. data/.gemtest +0 -0
  5. data/.travis.yml +25 -0
  6. data/CHANGELOG.ja.rdoc +857 -0
  7. data/CHANGELOG.rdoc +880 -0
  8. data/C_CODING_STYLE.rdoc +33 -0
  9. data/Gemfile +21 -0
  10. data/Manifest.txt +371 -0
  11. data/README.ja.rdoc +112 -0
  12. data/README.rdoc +180 -0
  13. data/ROADMAP.md +89 -0
  14. data/Rakefile +351 -0
  15. data/STANDARD_RESPONSES.md +47 -0
  16. data/Y_U_NO_GEMSPEC.md +155 -0
  17. data/bin/nokogiri +78 -0
  18. data/build_all +130 -0
  19. data/dependencies.yml +4 -0
  20. data/ext/nokogiri/depend +358 -0
  21. data/ext/nokogiri/extconf.rb +453 -0
  22. data/ext/nokogiri/html_document.c +170 -0
  23. data/ext/nokogiri/html_document.h +10 -0
  24. data/ext/nokogiri/html_element_description.c +279 -0
  25. data/ext/nokogiri/html_element_description.h +10 -0
  26. data/ext/nokogiri/html_entity_lookup.c +32 -0
  27. data/ext/nokogiri/html_entity_lookup.h +8 -0
  28. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  29. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  30. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  31. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  32. data/ext/nokogiri/nokogiri.c +148 -0
  33. data/ext/nokogiri/nokogiri.h +164 -0
  34. data/ext/nokogiri/xml_attr.c +94 -0
  35. data/ext/nokogiri/xml_attr.h +9 -0
  36. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  37. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  38. data/ext/nokogiri/xml_cdata.c +56 -0
  39. data/ext/nokogiri/xml_cdata.h +9 -0
  40. data/ext/nokogiri/xml_comment.c +54 -0
  41. data/ext/nokogiri/xml_comment.h +9 -0
  42. data/ext/nokogiri/xml_document.c +577 -0
  43. data/ext/nokogiri/xml_document.h +23 -0
  44. data/ext/nokogiri/xml_document_fragment.c +48 -0
  45. data/ext/nokogiri/xml_document_fragment.h +10 -0
  46. data/ext/nokogiri/xml_dtd.c +202 -0
  47. data/ext/nokogiri/xml_dtd.h +10 -0
  48. data/ext/nokogiri/xml_element_content.c +123 -0
  49. data/ext/nokogiri/xml_element_content.h +10 -0
  50. data/ext/nokogiri/xml_element_decl.c +69 -0
  51. data/ext/nokogiri/xml_element_decl.h +9 -0
  52. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  53. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  54. data/ext/nokogiri/xml_entity_decl.c +110 -0
  55. data/ext/nokogiri/xml_entity_decl.h +10 -0
  56. data/ext/nokogiri/xml_entity_reference.c +52 -0
  57. data/ext/nokogiri/xml_entity_reference.h +9 -0
  58. data/ext/nokogiri/xml_io.c +56 -0
  59. data/ext/nokogiri/xml_io.h +11 -0
  60. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  61. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  62. data/ext/nokogiri/xml_namespace.c +78 -0
  63. data/ext/nokogiri/xml_namespace.h +13 -0
  64. data/ext/nokogiri/xml_node.c +1541 -0
  65. data/ext/nokogiri/xml_node.h +13 -0
  66. data/ext/nokogiri/xml_node_set.c +467 -0
  67. data/ext/nokogiri/xml_node_set.h +14 -0
  68. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  69. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  70. data/ext/nokogiri/xml_reader.c +681 -0
  71. data/ext/nokogiri/xml_reader.h +10 -0
  72. data/ext/nokogiri/xml_relax_ng.c +161 -0
  73. data/ext/nokogiri/xml_relax_ng.h +9 -0
  74. data/ext/nokogiri/xml_sax_parser.c +312 -0
  75. data/ext/nokogiri/xml_sax_parser.h +39 -0
  76. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  77. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  78. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  79. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  80. data/ext/nokogiri/xml_schema.c +205 -0
  81. data/ext/nokogiri/xml_schema.h +9 -0
  82. data/ext/nokogiri/xml_syntax_error.c +63 -0
  83. data/ext/nokogiri/xml_syntax_error.h +13 -0
  84. data/ext/nokogiri/xml_text.c +52 -0
  85. data/ext/nokogiri/xml_text.h +9 -0
  86. data/ext/nokogiri/xml_xpath_context.c +307 -0
  87. data/ext/nokogiri/xml_xpath_context.h +10 -0
  88. data/ext/nokogiri/xslt_stylesheet.c +270 -0
  89. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  90. data/lib/nokogiri.rb +137 -0
  91. data/lib/nokogiri/2.0/nokogiri.so +0 -0
  92. data/lib/nokogiri/2.1/nokogiri.so +0 -0
  93. data/lib/nokogiri/css.rb +27 -0
  94. data/lib/nokogiri/css/node.rb +52 -0
  95. data/lib/nokogiri/css/parser.rb +715 -0
  96. data/lib/nokogiri/css/parser.y +249 -0
  97. data/lib/nokogiri/css/parser_extras.rb +91 -0
  98. data/lib/nokogiri/css/syntax_error.rb +7 -0
  99. data/lib/nokogiri/css/tokenizer.rb +152 -0
  100. data/lib/nokogiri/css/tokenizer.rex +55 -0
  101. data/lib/nokogiri/css/xpath_visitor.rb +219 -0
  102. data/lib/nokogiri/decorators/slop.rb +35 -0
  103. data/lib/nokogiri/html.rb +37 -0
  104. data/lib/nokogiri/html/builder.rb +35 -0
  105. data/lib/nokogiri/html/document.rb +333 -0
  106. data/lib/nokogiri/html/document_fragment.rb +41 -0
  107. data/lib/nokogiri/html/element_description.rb +23 -0
  108. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  109. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  110. data/lib/nokogiri/html/sax/parser.rb +52 -0
  111. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  112. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  113. data/lib/nokogiri/syntax_error.rb +4 -0
  114. data/lib/nokogiri/version.rb +106 -0
  115. data/lib/nokogiri/xml.rb +73 -0
  116. data/lib/nokogiri/xml/attr.rb +14 -0
  117. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  118. data/lib/nokogiri/xml/builder.rb +443 -0
  119. data/lib/nokogiri/xml/cdata.rb +11 -0
  120. data/lib/nokogiri/xml/character_data.rb +7 -0
  121. data/lib/nokogiri/xml/document.rb +279 -0
  122. data/lib/nokogiri/xml/document_fragment.rb +112 -0
  123. data/lib/nokogiri/xml/dtd.rb +32 -0
  124. data/lib/nokogiri/xml/element_content.rb +36 -0
  125. data/lib/nokogiri/xml/element_decl.rb +13 -0
  126. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  127. data/lib/nokogiri/xml/namespace.rb +13 -0
  128. data/lib/nokogiri/xml/node.rb +982 -0
  129. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  130. data/lib/nokogiri/xml/node_set.rb +355 -0
  131. data/lib/nokogiri/xml/notation.rb +6 -0
  132. data/lib/nokogiri/xml/parse_options.rb +98 -0
  133. data/lib/nokogiri/xml/pp.rb +2 -0
  134. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  135. data/lib/nokogiri/xml/pp/node.rb +56 -0
  136. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  137. data/lib/nokogiri/xml/reader.rb +112 -0
  138. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  139. data/lib/nokogiri/xml/sax.rb +4 -0
  140. data/lib/nokogiri/xml/sax/document.rb +171 -0
  141. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  142. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  143. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  144. data/lib/nokogiri/xml/schema.rb +63 -0
  145. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  146. data/lib/nokogiri/xml/text.rb +9 -0
  147. data/lib/nokogiri/xml/xpath.rb +10 -0
  148. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  149. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  150. data/lib/nokogiri/xslt.rb +56 -0
  151. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/suppressions/README.txt +1 -0
  154. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  155. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  156. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  157. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  158. data/tasks/nokogiri.org.rb +24 -0
  159. data/tasks/test.rb +95 -0
  160. data/test/css/test_nthiness.rb +222 -0
  161. data/test/css/test_parser.rb +358 -0
  162. data/test/css/test_tokenizer.rb +198 -0
  163. data/test/css/test_xpath_visitor.rb +96 -0
  164. data/test/decorators/test_slop.rb +16 -0
  165. data/test/files/2ch.html +108 -0
  166. data/test/files/address_book.rlx +12 -0
  167. data/test/files/address_book.xml +10 -0
  168. data/test/files/atom.xml +344 -0
  169. data/test/files/bar/bar.xsd +4 -0
  170. data/test/files/bogus.xml +0 -0
  171. data/test/files/dont_hurt_em_why.xml +422 -0
  172. data/test/files/encoding.html +82 -0
  173. data/test/files/encoding.xhtml +84 -0
  174. data/test/files/exslt.xml +8 -0
  175. data/test/files/exslt.xslt +35 -0
  176. data/test/files/foo/foo.xsd +4 -0
  177. data/test/files/metacharset.html +10 -0
  178. data/test/files/noencoding.html +47 -0
  179. data/test/files/po.xml +32 -0
  180. data/test/files/po.xsd +66 -0
  181. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  182. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  183. data/test/files/saml/xenc_schema.xsd +146 -0
  184. data/test/files/saml/xmldsig_schema.xsd +318 -0
  185. data/test/files/shift_jis.html +10 -0
  186. data/test/files/shift_jis.xml +5 -0
  187. data/test/files/shift_jis_no_charset.html +9 -0
  188. data/test/files/snuggles.xml +3 -0
  189. data/test/files/staff.dtd +10 -0
  190. data/test/files/staff.xml +59 -0
  191. data/test/files/staff.xslt +32 -0
  192. data/test/files/test_document_url/bar.xml +2 -0
  193. data/test/files/test_document_url/document.dtd +4 -0
  194. data/test/files/test_document_url/document.xml +6 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/files/to_be_xincluded.xml +2 -0
  197. data/test/files/valid_bar.xml +2 -0
  198. data/test/files/xinclude.xml +4 -0
  199. data/test/helper.rb +164 -0
  200. data/test/html/sax/test_parser.rb +141 -0
  201. data/test/html/sax/test_parser_context.rb +46 -0
  202. data/test/html/test_builder.rb +164 -0
  203. data/test/html/test_document.rb +619 -0
  204. data/test/html/test_document_encoding.rb +148 -0
  205. data/test/html/test_document_fragment.rb +261 -0
  206. data/test/html/test_element_description.rb +105 -0
  207. data/test/html/test_named_characters.rb +14 -0
  208. data/test/html/test_node.rb +196 -0
  209. data/test/html/test_node_encoding.rb +27 -0
  210. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  211. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  212. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  213. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  214. data/test/namespaces/test_namespaces_in_parsed_doc.rb +66 -0
  215. data/test/test_convert_xpath.rb +135 -0
  216. data/test/test_css_cache.rb +45 -0
  217. data/test/test_encoding_handler.rb +46 -0
  218. data/test/test_memory_leak.rb +156 -0
  219. data/test/test_nokogiri.rb +138 -0
  220. data/test/test_reader.rb +558 -0
  221. data/test/test_soap4r_sax.rb +52 -0
  222. data/test/test_xslt_transforms.rb +279 -0
  223. data/test/xml/node/test_save_options.rb +28 -0
  224. data/test/xml/node/test_subclass.rb +44 -0
  225. data/test/xml/sax/test_parser.rb +382 -0
  226. data/test/xml/sax/test_parser_context.rb +115 -0
  227. data/test/xml/sax/test_push_parser.rb +157 -0
  228. data/test/xml/test_attr.rb +64 -0
  229. data/test/xml/test_attribute_decl.rb +86 -0
  230. data/test/xml/test_builder.rb +315 -0
  231. data/test/xml/test_c14n.rb +161 -0
  232. data/test/xml/test_cdata.rb +48 -0
  233. data/test/xml/test_comment.rb +29 -0
  234. data/test/xml/test_document.rb +934 -0
  235. data/test/xml/test_document_encoding.rb +28 -0
  236. data/test/xml/test_document_fragment.rb +228 -0
  237. data/test/xml/test_dtd.rb +187 -0
  238. data/test/xml/test_dtd_encoding.rb +33 -0
  239. data/test/xml/test_element_content.rb +56 -0
  240. data/test/xml/test_element_decl.rb +73 -0
  241. data/test/xml/test_entity_decl.rb +122 -0
  242. data/test/xml/test_entity_reference.rb +245 -0
  243. data/test/xml/test_namespace.rb +95 -0
  244. data/test/xml/test_node.rb +1155 -0
  245. data/test/xml/test_node_attributes.rb +113 -0
  246. data/test/xml/test_node_encoding.rb +107 -0
  247. data/test/xml/test_node_inheritance.rb +32 -0
  248. data/test/xml/test_node_reparenting.rb +374 -0
  249. data/test/xml/test_node_set.rb +755 -0
  250. data/test/xml/test_parse_options.rb +64 -0
  251. data/test/xml/test_processing_instruction.rb +30 -0
  252. data/test/xml/test_reader_encoding.rb +142 -0
  253. data/test/xml/test_relax_ng.rb +60 -0
  254. data/test/xml/test_schema.rb +129 -0
  255. data/test/xml/test_syntax_error.rb +12 -0
  256. data/test/xml/test_text.rb +45 -0
  257. data/test/xml/test_unparented_node.rb +422 -0
  258. data/test/xml/test_xinclude.rb +83 -0
  259. data/test/xml/test_xpath.rb +376 -0
  260. data/test/xslt/test_custom_functions.rb +133 -0
  261. data/test/xslt/test_exception_handling.rb +37 -0
  262. data/test_all +81 -0
  263. metadata +601 -0
@@ -0,0 +1,55 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Tokenizer # :nodoc:
4
+
5
+ macro
6
+ nl \n|\r\n|\r|\f
7
+ w [\s]*
8
+ nonascii [^\0-\177]
9
+ num -?([0-9]+|[0-9]*\.[0-9]+)
10
+ unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
11
+
12
+ escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
13
+ nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_A-Za-z]|{nonascii}|{escape}
15
+ ident [-@]?({nmstart})({nmchar})*
16
+ name ({nmchar})+
17
+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
18
+ string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
19
+ string {string1}|{string2}
20
+
21
+ rule
22
+
23
+ # [:state] pattern [actions]
24
+
25
+ has\({w} { [:HAS, text] }
26
+ {ident}\({w} { [:FUNCTION, text] }
27
+ {ident} { [:IDENT, text] }
28
+ \#{name} { [:HASH, text] }
29
+ {w}~={w} { [:INCLUDES, text] }
30
+ {w}\|={w} { [:DASHMATCH, text] }
31
+ {w}\^={w} { [:PREFIXMATCH, text] }
32
+ {w}\$={w} { [:SUFFIXMATCH, text] }
33
+ {w}\*={w} { [:SUBSTRINGMATCH, text] }
34
+ {w}!={w} { [:NOT_EQUAL, text] }
35
+ {w}={w} { [:EQUAL, text] }
36
+ {w}\) { [:RPAREN, text] }
37
+ {w}\[{w} { [:LSQUARE, text] }
38
+ {w}\] { [:RSQUARE, text] }
39
+ {w}\+{w} { [:PLUS, text] }
40
+ {w}>{w} { [:GREATER, text] }
41
+ {w},{w} { [:COMMA, text] }
42
+ {w}~{w} { [:TILDE, text] }
43
+ \:not\({w} { [:NOT, text] }
44
+ {num} { [:NUMBER, text] }
45
+ {w}\/\/{w} { [:DOUBLESLASH, text] }
46
+ {w}\/{w} { [:SLASH, text] }
47
+
48
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
49
+
50
+ [\s]+ { [:S, text] }
51
+ {string} { [:STRING, text] }
52
+ . { [text, text] }
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,219 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class XPathVisitor # :nodoc:
4
+ def visit_function node
5
+
6
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
+ return self.send(msg, node) if self.respond_to?(msg)
8
+
9
+ case node.value.first
10
+ when /^text\(/
11
+ 'child::text()'
12
+ when /^self\(/
13
+ "self::#{node.value[1]}"
14
+ when /^eq\(/
15
+ "position() = #{node.value[1]}"
16
+ when /^(nth|nth-of-type)\(/
17
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
+ nth(node.value[1])
19
+ else
20
+ "position() = #{node.value[1]}"
21
+ end
22
+ when /^nth-child\(/
23
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
+ nth(node.value[1], :child => true)
25
+ else
26
+ "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
27
+ end
28
+ when /^nth-last-of-type\(/
29
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
+ nth(node.value[1], :last => true)
31
+ else
32
+ index = node.value[1].to_i - 1
33
+ index == 0 ? "position() = last()" : "position() = last() - #{index}"
34
+ end
35
+ when /^nth-last-child\(/
36
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
+ nth(node.value[1], :last => true, :child => true)
38
+ else
39
+ "count(following-sbiling::*) = #{node.value[1].to_i-1}"
40
+ end
41
+ when /^(first|first-of-type)\(/
42
+ "position() = 1"
43
+ when /^(last|last-of-type)\(/
44
+ "position() = last()"
45
+ when /^contains\(/
46
+ "contains(., #{node.value[1]})"
47
+ when /^gt\(/
48
+ "position() > #{node.value[1]}"
49
+ when /^only-child\(/
50
+ "last() = 1"
51
+ when /^comment\(/
52
+ "comment()"
53
+ when /^has\(/
54
+ node.value[1].accept(self)
55
+ else
56
+ args = ['.'] + node.value[1..-1]
57
+ "#{node.value.first}#{args.join(', ')})"
58
+ end
59
+ end
60
+
61
+ def visit_not node
62
+ child = node.value.first
63
+ if :ELEMENT_NAME == child.type
64
+ "not(self::#{child.accept(self)})"
65
+ else
66
+ "not(#{child.accept(self)})"
67
+ end
68
+ end
69
+
70
+ def visit_id node
71
+ node.value.first =~ /^#(.*)$/
72
+ "@id = '#{$1}'"
73
+ end
74
+
75
+ def visit_attribute_condition node
76
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
77
+ ''
78
+ else
79
+ '@'
80
+ end
81
+ attribute += node.value.first.accept(self)
82
+
83
+ # Support non-standard css
84
+ attribute.gsub!(/^@@/, '@')
85
+
86
+ return attribute unless node.value.length == 3
87
+
88
+ value = node.value.last
89
+ value = "'#{value}'" if value !~ /^['"]/
90
+
91
+ case node.value[1]
92
+ when :equal
93
+ attribute + " = " + "#{value}"
94
+ when :not_equal
95
+ attribute + " != " + "#{value}"
96
+ when :substring_match
97
+ "contains(#{attribute}, #{value})"
98
+ when :prefix_match
99
+ "starts-with(#{attribute}, #{value})"
100
+ when :dash_match
101
+ "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
102
+ when :includes
103
+ "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
104
+ when :suffix_match
105
+ "substring(#{attribute}, string-length(#{attribute}) - " +
106
+ "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
107
+ else
108
+ attribute + " #{node.value[1]} " + "#{value}"
109
+ end
110
+ end
111
+
112
+ def visit_pseudo_class node
113
+ if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
114
+ node.value.first.accept(self)
115
+ else
116
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
117
+ return self.send(msg, node) if self.respond_to?(msg)
118
+
119
+ case node.value.first
120
+ when "first" then "position() = 1"
121
+ when "first-child" then "count(preceding-sibling::*) = 0"
122
+ when "last" then "position() = last()"
123
+ when "last-child" then "count(following-sibling::*) = 0"
124
+ when "first-of-type" then "position() = 1"
125
+ when "last-of-type" then "position() = last()"
126
+ when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
127
+ when "only-of-type" then "last() = 1"
128
+ when "empty" then "not(node())"
129
+ when "parent" then "node()"
130
+ when "root" then "not(parent::*)"
131
+ else
132
+ node.value.first + "(.)"
133
+ end
134
+ end
135
+ end
136
+
137
+ def visit_class_condition node
138
+ "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
139
+ end
140
+
141
+ def visit_combinator node
142
+ if is_of_type_pseudo_class?(node.value.last)
143
+ "#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
144
+ else
145
+ "#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
146
+ end
147
+ end
148
+
149
+ {
150
+ 'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
151
+ 'following_selector' => "/following-sibling::",
152
+ 'descendant_selector' => '//',
153
+ 'child_selector' => '/',
154
+ }.each do |k,v|
155
+ class_eval %{
156
+ def visit_#{k} node
157
+ "\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
158
+ end
159
+ }
160
+ end
161
+
162
+ def visit_conditional_selector node
163
+ node.value.first.accept(self) + '[' +
164
+ node.value.last.accept(self) + ']'
165
+ end
166
+
167
+ def visit_element_name node
168
+ node.value.first
169
+ end
170
+
171
+ def accept node
172
+ node.accept(self)
173
+ end
174
+
175
+ private
176
+ def nth node, options={}
177
+ raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
178
+
179
+ a, b = read_a_and_positive_b node.value
180
+ position = if options[:child]
181
+ options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
182
+ else
183
+ options[:last] ? "(last()-position()+1)" : "position()"
184
+ end
185
+
186
+ if (b == 0)
187
+ return "(#{position} mod #{a}) = 0"
188
+ else
189
+ compare = (a < 0) ? "<=" : ">="
190
+ return "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
191
+ end
192
+ end
193
+
194
+ def read_a_and_positive_b values
195
+ op = values[2]
196
+ if op == "+"
197
+ a = values[0].to_i
198
+ b = values[3].to_i
199
+ elsif op == "-"
200
+ a = values[0].to_i
201
+ b = a - (values[3].to_i % a)
202
+ else
203
+ raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
204
+ end
205
+ [a, b]
206
+ end
207
+
208
+ def is_of_type_pseudo_class? node
209
+ if node.type==:PSEUDO_CLASS
210
+ if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
211
+ node.value[0].value[0]
212
+ else
213
+ node.value[0]
214
+ end =~ /(nth|first|last|only)-of-type(\()?/
215
+ end
216
+ end
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,35 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ ###
4
+ # The Slop decorator implements method missing such that a methods may be
5
+ # used instead of XPath or CSS. See Nokogiri.Slop
6
+ module Slop
7
+ ###
8
+ # look for node with +name+. See Nokogiri.Slop
9
+ def method_missing name, *args, &block
10
+ prefix = implied_xpath_context
11
+
12
+ if args.empty?
13
+ list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
14
+ elsif args.first.is_a? Hash
15
+ hash = args.first
16
+ if hash[:css]
17
+ list = css("#{name}#{hash[:css]}")
18
+ elsif hash[:xpath]
19
+ conds = Array(hash[:xpath]).join(' and ')
20
+ list = xpath("#{prefix}#{name}[#{conds}]")
21
+ end
22
+ else
23
+ CSS::Parser.without_cache do
24
+ list = xpath(
25
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => prefix)
26
+ )
27
+ end
28
+ end
29
+
30
+ super if list.empty?
31
+ list.length == 1 ? list.first : list
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,37 @@
1
+ require 'nokogiri/html/entity_lookup'
2
+ require 'nokogiri/html/document'
3
+ require 'nokogiri/html/document_fragment'
4
+ require 'nokogiri/html/sax/parser_context'
5
+ require 'nokogiri/html/sax/parser'
6
+ require 'nokogiri/html/sax/push_parser'
7
+ require 'nokogiri/html/element_description'
8
+ require 'nokogiri/html/element_description_defaults'
9
+
10
+ module Nokogiri
11
+ class << self
12
+ ###
13
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
14
+ def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
15
+ Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
16
+ end
17
+ end
18
+
19
+ module HTML
20
+ class << self
21
+ ###
22
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
23
+ def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
24
+ Document.parse(thing, url, encoding, options, &block)
25
+ end
26
+
27
+ ####
28
+ # Parse a fragment from +string+ in to a NodeSet.
29
+ def fragment string, encoding = nil
30
+ HTML::DocumentFragment.parse string, encoding
31
+ end
32
+ end
33
+
34
+ # Instance of Nokogiri::HTML::EntityLookup
35
+ NamedCharacters = EntityLookup.new
36
+ end
37
+ end
@@ -0,0 +1,35 @@
1
+ module Nokogiri
2
+ module HTML
3
+ ###
4
+ # Nokogiri HTML builder is used for building HTML documents. It is very
5
+ # similar to the Nokogiri::XML::Builder. In fact, you should go read the
6
+ # documentation for Nokogiri::XML::Builder before reading this
7
+ # documentation.
8
+ #
9
+ # == Synopsis:
10
+ #
11
+ # Create an HTML document with a body that has an onload attribute, and a
12
+ # span tag with a class of "bold" that has content of "Hello world".
13
+ #
14
+ # builder = Nokogiri::HTML::Builder.new do |doc|
15
+ # doc.html {
16
+ # doc.body(:onload => 'some_func();') {
17
+ # doc.span.bold {
18
+ # doc.text "Hello world"
19
+ # }
20
+ # }
21
+ # }
22
+ # end
23
+ # puts builder.to_html
24
+ #
25
+ # The HTML builder inherits from the XML builder, so make sure to read the
26
+ # Nokogiri::XML::Builder documentation.
27
+ class Builder < Nokogiri::XML::Builder
28
+ ###
29
+ # Convert the builder to HTML
30
+ def to_html
31
+ @doc.to_html
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,333 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class Document < Nokogiri::XML::Document
4
+ ###
5
+ # Get the meta tag encoding for this document. If there is no meta tag,
6
+ # then nil is returned.
7
+ def meta_encoding
8
+ case
9
+ when meta = at('//meta[@charset]')
10
+ meta[:charset]
11
+ when meta = meta_content_type
12
+ meta['content'][/charset\s*=\s*([\w-]+)/i, 1]
13
+ end
14
+ end
15
+
16
+ ###
17
+ # Set the meta tag encoding for this document.
18
+ #
19
+ # If an meta encoding tag is already present, its content is
20
+ # replaced with the given text.
21
+ #
22
+ # Otherwise, this method tries to create one at an appropriate
23
+ # place supplying head and/or html elements as necessary, which
24
+ # is inside a head element if any, and before any text node or
25
+ # content element (typically <body>) if any.
26
+ #
27
+ # The result when trying to set an encoding that is different
28
+ # from the document encoding is undefined.
29
+ #
30
+ # Beware in CRuby, that libxml2 automatically inserts a meta tag
31
+ # into a head element.
32
+ def meta_encoding= encoding
33
+ case
34
+ when meta = meta_content_type
35
+ meta['content'] = 'text/html; charset=%s' % encoding
36
+ encoding
37
+ when meta = at('//meta[@charset]')
38
+ meta['charset'] = encoding
39
+ else
40
+ meta = XML::Node.new('meta', self)
41
+ if dtd = internal_subset and dtd.html5_dtd?
42
+ meta['charset'] = encoding
43
+ else
44
+ meta['http-equiv'] = 'Content-Type'
45
+ meta['content'] = 'text/html; charset=%s' % encoding
46
+ end
47
+
48
+ case
49
+ when head = at('//head')
50
+ head.prepend_child(meta)
51
+ else
52
+ set_metadata_element(meta)
53
+ end
54
+ encoding
55
+ end
56
+ end
57
+
58
+ def meta_content_type
59
+ xpath('//meta[@http-equiv and boolean(@content)]').find { |node|
60
+ node['http-equiv'] =~ /\AContent-Type\z/i
61
+ }
62
+ end
63
+ private :meta_content_type
64
+
65
+ ###
66
+ # Get the title string of this document. Return nil if there is
67
+ # no title tag.
68
+ def title
69
+ title = at('//title') and title.inner_text
70
+ end
71
+
72
+ ###
73
+ # Set the title string of this document.
74
+ #
75
+ # If a title element is already present, its content is replaced
76
+ # with the given text.
77
+ #
78
+ # Otherwise, this method tries to create one at an appropriate
79
+ # place supplying head and/or html elements as necessary, which
80
+ # is inside a head element if any, right after a meta
81
+ # encoding/charset tag if any, and before any text node or
82
+ # content element (typically <body>) if any.
83
+ def title=(text)
84
+ tnode = XML::Text.new(text, self)
85
+ if title = at('//title')
86
+ title.children = tnode
87
+ return text
88
+ end
89
+
90
+ title = XML::Node.new('title', self) << tnode
91
+ case
92
+ when head = at('//head')
93
+ head << title
94
+ when meta = at('//meta[@charset]') || meta_content_type
95
+ # better put after charset declaration
96
+ meta.add_next_sibling(title)
97
+ else
98
+ set_metadata_element(title)
99
+ end
100
+ text
101
+ end
102
+
103
+ def set_metadata_element(element)
104
+ case
105
+ when head = at('//head')
106
+ head << element
107
+ when html = at('//html')
108
+ head = html.prepend_child(XML::Node.new('head', self))
109
+ head.prepend_child(element)
110
+ when first = children.find { |node|
111
+ case node
112
+ when XML::Element, XML::Text
113
+ true
114
+ end
115
+ }
116
+ # We reach here only if the underlying document model
117
+ # allows <html>/<head> elements to be omitted and does not
118
+ # automatically supply them.
119
+ first.add_previous_sibling(element)
120
+ else
121
+ html = add_child(XML::Node.new('html', self))
122
+ head = html.add_child(XML::Node.new('head', self))
123
+ head.prepend_child(element)
124
+ end
125
+ end
126
+ private :set_metadata_element
127
+
128
+ ####
129
+ # Serialize Node using +options+. Save options can also be set using a
130
+ # block. See SaveOptions.
131
+ #
132
+ # These two statements are equivalent:
133
+ #
134
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
135
+ #
136
+ # or
137
+ #
138
+ # node.serialize(:encoding => 'UTF-8') do |config|
139
+ # config.format.as_xml
140
+ # end
141
+ #
142
+ def serialize options = {}
143
+ options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
144
+ super
145
+ end
146
+
147
+ ####
148
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
149
+ def fragment tags = nil
150
+ DocumentFragment.new(self, tags, self.root)
151
+ end
152
+
153
+ class << self
154
+ ###
155
+ # Parse HTML. +string_or_io+ may be a String, or any object that
156
+ # responds to _read_ and _close_ such as an IO, or StringIO.
157
+ # +url+ is resource where this document is located. +encoding+ is the
158
+ # encoding that should be used when processing the document. +options+
159
+ # is a number that sets options in the parser, such as
160
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
161
+ # Nokogiri::XML::ParseOptions.
162
+ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
163
+
164
+ options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
165
+ # Give the options to the user
166
+ yield options if block_given?
167
+
168
+ if string_or_io.respond_to?(:encoding)
169
+ unless string_or_io.encoding.name == "ASCII-8BIT"
170
+ encoding ||= string_or_io.encoding.name
171
+ end
172
+ end
173
+
174
+ if string_or_io.respond_to?(:read)
175
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
176
+ if !encoding
177
+ # Libxml2's parser has poor support for encoding
178
+ # detection. First, it does not recognize the HTML5
179
+ # style meta charset declaration. Secondly, even if it
180
+ # successfully detects an encoding hint, it does not
181
+ # re-decode or re-parse the preceding part which may be
182
+ # garbled.
183
+ #
184
+ # EncodingReader aims to perform advanced encoding
185
+ # detection beyond what Libxml2 does, and to emulate
186
+ # rewinding of a stream and make Libxml2 redo parsing
187
+ # from the start when an encoding hint is found.
188
+ string_or_io = EncodingReader.new(string_or_io)
189
+ begin
190
+ return read_io(string_or_io, url, encoding, options.to_i)
191
+ rescue EncodingFound => e
192
+ encoding = e.found_encoding
193
+ end
194
+ end
195
+ return read_io(string_or_io, url, encoding, options.to_i)
196
+ end
197
+
198
+ # read_memory pukes on empty docs
199
+ return new if string_or_io.nil? or string_or_io.empty?
200
+
201
+ encoding ||= EncodingReader.detect_encoding(string_or_io)
202
+
203
+ read_memory(string_or_io, url, encoding, options.to_i)
204
+ end
205
+ end
206
+
207
+ class EncodingFound < StandardError # :nodoc:
208
+ attr_reader :found_encoding
209
+
210
+ def initialize(encoding)
211
+ @found_encoding = encoding
212
+ super("encoding found: %s" % encoding)
213
+ end
214
+ end
215
+
216
+ class EncodingReader # :nodoc:
217
+ class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
218
+ attr_reader :encoding
219
+
220
+ def initialize
221
+ @encoding = nil
222
+ super()
223
+ end
224
+
225
+ def start_element(name, attrs = [])
226
+ return unless name == 'meta'
227
+ attr = Hash[attrs]
228
+ charset = attr['charset'] and
229
+ @encoding = charset
230
+ http_equiv = attr['http-equiv'] and
231
+ http_equiv.match(/\AContent-Type\z/i) and
232
+ content = attr['content'] and
233
+ m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
234
+ @encoding = m[1]
235
+ end
236
+ end
237
+
238
+ class JumpSAXHandler < SAXHandler
239
+ def initialize(jumptag)
240
+ @jumptag = jumptag
241
+ super()
242
+ end
243
+
244
+ def start_element(name, attrs = [])
245
+ super
246
+ throw @jumptag, @encoding if @encoding
247
+ throw @jumptag, nil if name =~ /\A(?:div|h1|img|p|br)\z/
248
+ end
249
+ end
250
+
251
+ def self.detect_encoding(chunk)
252
+ if Nokogiri.jruby? && EncodingReader.is_jruby_without_fix?
253
+ return EncodingReader.detect_encoding_for_jruby_without_fix(chunk)
254
+ end
255
+ m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
256
+ return Nokogiri.XML(m[1]).encoding
257
+
258
+ if Nokogiri.jruby?
259
+ m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
260
+ return m[4]
261
+ catch(:encoding_found) {
262
+ Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
263
+ nil
264
+ }
265
+ else
266
+ handler = SAXHandler.new
267
+ parser = Nokogiri::HTML::SAX::PushParser.new(handler)
268
+ parser << chunk rescue Nokogiri::SyntaxError
269
+ handler.encoding
270
+ end
271
+ end
272
+
273
+ def self.is_jruby_without_fix?
274
+ JRUBY_VERSION.split('.').join.to_i < 165
275
+ end
276
+
277
+ def self.detect_encoding_for_jruby_without_fix(chunk)
278
+ m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
279
+ return Nokogiri.XML(m[1]).encoding
280
+
281
+ m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
282
+ return m[4]
283
+
284
+ catch(:encoding_found) {
285
+ Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found.to_s)).parse(chunk)
286
+ nil
287
+ }
288
+ rescue Nokogiri::SyntaxError, RuntimeError
289
+ # Ignore parser errors that nokogiri may raise
290
+ nil
291
+ end
292
+
293
+ def initialize(io)
294
+ @io = io
295
+ @firstchunk = nil
296
+ @encoding_found = nil
297
+ end
298
+
299
+ # This method is used by the C extension so that
300
+ # Nokogiri::HTML::Document#read_io() does not leak memory when
301
+ # EncodingFound is raised.
302
+ attr_reader :encoding_found
303
+
304
+ def read(len)
305
+ # no support for a call without len
306
+
307
+ if !@firstchunk
308
+ @firstchunk = @io.read(len) or return nil
309
+
310
+ # This implementation expects that the first call from
311
+ # htmlReadIO() is made with a length long enough (~1KB) to
312
+ # achieve advanced encoding detection.
313
+ if encoding = EncodingReader.detect_encoding(@firstchunk)
314
+ # The first chunk is stored for the next read in retry.
315
+ raise @encoding_found = EncodingFound.new(encoding)
316
+ end
317
+ end
318
+ @encoding_found = nil
319
+
320
+ ret = @firstchunk.slice!(0, len)
321
+ if (len -= ret.length) > 0
322
+ rest = @io.read(len) and ret << rest
323
+ end
324
+ if ret.empty?
325
+ nil
326
+ else
327
+ ret
328
+ end
329
+ end
330
+ end
331
+ end
332
+ end
333
+ end