nokogiri-backupify 1.5.0.beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. data/.autotest +26 -0
  2. data/CHANGELOG.ja.rdoc +509 -0
  3. data/CHANGELOG.rdoc +490 -0
  4. data/Manifest.txt +274 -0
  5. data/README.ja.rdoc +106 -0
  6. data/README.rdoc +150 -0
  7. data/Rakefile +217 -0
  8. data/bin/nokogiri +54 -0
  9. data/deps.rip +5 -0
  10. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  11. data/ext/java/nokogiri/HtmlDocument.java +146 -0
  12. data/ext/java/nokogiri/HtmlElementDescription.java +145 -0
  13. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  14. data/ext/java/nokogiri/HtmlSaxParserContext.java +256 -0
  15. data/ext/java/nokogiri/NokogiriService.java +466 -0
  16. data/ext/java/nokogiri/XmlAttr.java +183 -0
  17. data/ext/java/nokogiri/XmlAttributeDecl.java +130 -0
  18. data/ext/java/nokogiri/XmlCdata.java +89 -0
  19. data/ext/java/nokogiri/XmlComment.java +84 -0
  20. data/ext/java/nokogiri/XmlDocument.java +514 -0
  21. data/ext/java/nokogiri/XmlDocumentFragment.java +216 -0
  22. data/ext/java/nokogiri/XmlDtd.java +464 -0
  23. data/ext/java/nokogiri/XmlElement.java +221 -0
  24. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  25. data/ext/java/nokogiri/XmlElementDecl.java +147 -0
  26. data/ext/java/nokogiri/XmlEntityDecl.java +161 -0
  27. data/ext/java/nokogiri/XmlEntityReference.java +75 -0
  28. data/ext/java/nokogiri/XmlNamespace.java +127 -0
  29. data/ext/java/nokogiri/XmlNode.java +1392 -0
  30. data/ext/java/nokogiri/XmlNodeSet.java +284 -0
  31. data/ext/java/nokogiri/XmlProcessingInstruction.java +103 -0
  32. data/ext/java/nokogiri/XmlReader.java +409 -0
  33. data/ext/java/nokogiri/XmlRelaxng.java +199 -0
  34. data/ext/java/nokogiri/XmlSaxParserContext.java +353 -0
  35. data/ext/java/nokogiri/XmlSaxPushParser.java +182 -0
  36. data/ext/java/nokogiri/XmlSchema.java +175 -0
  37. data/ext/java/nokogiri/XmlSyntaxError.java +114 -0
  38. data/ext/java/nokogiri/XmlText.java +135 -0
  39. data/ext/java/nokogiri/XmlXpathContext.java +175 -0
  40. data/ext/java/nokogiri/XsltStylesheet.java +181 -0
  41. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +205 -0
  42. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +73 -0
  43. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +80 -0
  44. data/ext/java/nokogiri/internals/NokogiriHandler.java +326 -0
  45. data/ext/java/nokogiri/internals/NokogiriHelpers.java +583 -0
  46. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +170 -0
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +118 -0
  48. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +73 -0
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  50. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
  51. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +120 -0
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +56 -0
  53. data/ext/java/nokogiri/internals/ParserContext.java +278 -0
  54. data/ext/java/nokogiri/internals/PushInputStream.java +411 -0
  55. data/ext/java/nokogiri/internals/ReaderNode.java +473 -0
  56. data/ext/java/nokogiri/internals/SaveContext.java +282 -0
  57. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +68 -0
  58. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  59. data/ext/java/nokogiri/internals/XmlDomParser.java +77 -0
  60. data/ext/java/nokogiri/internals/XmlDomParserContext.java +233 -0
  61. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  62. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
  63. data/ext/nokogiri/depend +358 -0
  64. data/ext/nokogiri/extconf.rb +124 -0
  65. data/ext/nokogiri/html_document.c +154 -0
  66. data/ext/nokogiri/html_document.h +10 -0
  67. data/ext/nokogiri/html_element_description.c +276 -0
  68. data/ext/nokogiri/html_element_description.h +10 -0
  69. data/ext/nokogiri/html_entity_lookup.c +32 -0
  70. data/ext/nokogiri/html_entity_lookup.h +8 -0
  71. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  72. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  73. data/ext/nokogiri/nokogiri.c +92 -0
  74. data/ext/nokogiri/nokogiri.h +160 -0
  75. data/ext/nokogiri/xml_attr.c +94 -0
  76. data/ext/nokogiri/xml_attr.h +9 -0
  77. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  78. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  79. data/ext/nokogiri/xml_cdata.c +56 -0
  80. data/ext/nokogiri/xml_cdata.h +9 -0
  81. data/ext/nokogiri/xml_comment.c +54 -0
  82. data/ext/nokogiri/xml_comment.h +9 -0
  83. data/ext/nokogiri/xml_document.c +478 -0
  84. data/ext/nokogiri/xml_document.h +23 -0
  85. data/ext/nokogiri/xml_document_fragment.c +48 -0
  86. data/ext/nokogiri/xml_document_fragment.h +10 -0
  87. data/ext/nokogiri/xml_dtd.c +202 -0
  88. data/ext/nokogiri/xml_dtd.h +10 -0
  89. data/ext/nokogiri/xml_element_content.c +123 -0
  90. data/ext/nokogiri/xml_element_content.h +10 -0
  91. data/ext/nokogiri/xml_element_decl.c +69 -0
  92. data/ext/nokogiri/xml_element_decl.h +9 -0
  93. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  94. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  95. data/ext/nokogiri/xml_entity_decl.c +110 -0
  96. data/ext/nokogiri/xml_entity_decl.h +10 -0
  97. data/ext/nokogiri/xml_entity_reference.c +52 -0
  98. data/ext/nokogiri/xml_entity_reference.h +9 -0
  99. data/ext/nokogiri/xml_io.c +31 -0
  100. data/ext/nokogiri/xml_io.h +11 -0
  101. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  102. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  103. data/ext/nokogiri/xml_namespace.c +84 -0
  104. data/ext/nokogiri/xml_namespace.h +13 -0
  105. data/ext/nokogiri/xml_node.c +1384 -0
  106. data/ext/nokogiri/xml_node.h +13 -0
  107. data/ext/nokogiri/xml_node_set.c +418 -0
  108. data/ext/nokogiri/xml_node_set.h +9 -0
  109. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  110. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  111. data/ext/nokogiri/xml_reader.c +684 -0
  112. data/ext/nokogiri/xml_reader.h +10 -0
  113. data/ext/nokogiri/xml_relax_ng.c +161 -0
  114. data/ext/nokogiri/xml_relax_ng.h +9 -0
  115. data/ext/nokogiri/xml_sax_parser.c +288 -0
  116. data/ext/nokogiri/xml_sax_parser.h +39 -0
  117. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  118. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  119. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  120. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  121. data/ext/nokogiri/xml_schema.c +205 -0
  122. data/ext/nokogiri/xml_schema.h +9 -0
  123. data/ext/nokogiri/xml_syntax_error.c +58 -0
  124. data/ext/nokogiri/xml_syntax_error.h +13 -0
  125. data/ext/nokogiri/xml_text.c +50 -0
  126. data/ext/nokogiri/xml_text.h +9 -0
  127. data/ext/nokogiri/xml_xpath_context.c +309 -0
  128. data/ext/nokogiri/xml_xpath_context.h +9 -0
  129. data/ext/nokogiri/xslt_stylesheet.c +258 -0
  130. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  131. data/lib/isorelax.jar +0 -0
  132. data/lib/jing.jar +0 -0
  133. data/lib/nekodtd.jar +0 -0
  134. data/lib/nekohtml.jar +0 -0
  135. data/lib/nokogiri.rb +143 -0
  136. data/lib/nokogiri/css.rb +23 -0
  137. data/lib/nokogiri/css/node.rb +99 -0
  138. data/lib/nokogiri/css/parser.rb +677 -0
  139. data/lib/nokogiri/css/parser.y +237 -0
  140. data/lib/nokogiri/css/parser_extras.rb +91 -0
  141. data/lib/nokogiri/css/syntax_error.rb +7 -0
  142. data/lib/nokogiri/css/tokenizer.rb +152 -0
  143. data/lib/nokogiri/css/tokenizer.rex +55 -0
  144. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  145. data/lib/nokogiri/decorators/slop.rb +35 -0
  146. data/lib/nokogiri/html.rb +36 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +221 -0
  149. data/lib/nokogiri/html/document_fragment.rb +41 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  152. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  153. data/lib/nokogiri/html/sax/parser.rb +52 -0
  154. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  155. data/lib/nokogiri/syntax_error.rb +4 -0
  156. data/lib/nokogiri/version.rb +35 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +418 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +218 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +84 -0
  165. data/lib/nokogiri/xml/dtd.rb +22 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  169. data/lib/nokogiri/xml/namespace.rb +13 -0
  170. data/lib/nokogiri/xml/node.rb +907 -0
  171. data/lib/nokogiri/xml/node/save_options.rb +45 -0
  172. data/lib/nokogiri/xml/node_set.rb +350 -0
  173. data/lib/nokogiri/xml/notation.rb +6 -0
  174. data/lib/nokogiri/xml/parse_options.rb +85 -0
  175. data/lib/nokogiri/xml/pp.rb +2 -0
  176. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  177. data/lib/nokogiri/xml/pp/node.rb +56 -0
  178. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  179. data/lib/nokogiri/xml/reader.rb +112 -0
  180. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  181. data/lib/nokogiri/xml/sax.rb +4 -0
  182. data/lib/nokogiri/xml/sax/document.rb +164 -0
  183. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  184. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  185. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  186. data/lib/nokogiri/xml/schema.rb +57 -0
  187. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  188. data/lib/nokogiri/xml/text.rb +9 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +52 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xercesImpl.jar +0 -0
  195. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  196. data/tasks/cross_compile.rb +177 -0
  197. data/tasks/test.rb +94 -0
  198. data/test/css/test_nthiness.rb +159 -0
  199. data/test/css/test_parser.rb +303 -0
  200. data/test/css/test_tokenizer.rb +198 -0
  201. data/test/css/test_xpath_visitor.rb +85 -0
  202. data/test/decorators/test_slop.rb +16 -0
  203. data/test/files/2ch.html +108 -0
  204. data/test/files/address_book.rlx +12 -0
  205. data/test/files/address_book.xml +10 -0
  206. data/test/files/bar/bar.xsd +4 -0
  207. data/test/files/dont_hurt_em_why.xml +422 -0
  208. data/test/files/exslt.xml +8 -0
  209. data/test/files/exslt.xslt +35 -0
  210. data/test/files/foo/foo.xsd +4 -0
  211. data/test/files/po.xml +32 -0
  212. data/test/files/po.xsd +66 -0
  213. data/test/files/shift_jis.html +10 -0
  214. data/test/files/shift_jis.xml +5 -0
  215. data/test/files/snuggles.xml +3 -0
  216. data/test/files/staff.dtd +10 -0
  217. data/test/files/staff.xml +59 -0
  218. data/test/files/staff.xslt +32 -0
  219. data/test/files/tlm.html +850 -0
  220. data/test/files/valid_bar.xml +2 -0
  221. data/test/helper.rb +171 -0
  222. data/test/html/sax/test_parser.rb +136 -0
  223. data/test/html/sax/test_parser_context.rb +48 -0
  224. data/test/html/test_builder.rb +164 -0
  225. data/test/html/test_document.rb +457 -0
  226. data/test/html/test_document_encoding.rb +123 -0
  227. data/test/html/test_document_fragment.rb +255 -0
  228. data/test/html/test_element_description.rb +100 -0
  229. data/test/html/test_named_characters.rb +14 -0
  230. data/test/html/test_node.rb +190 -0
  231. data/test/html/test_node_encoding.rb +27 -0
  232. data/test/test_convert_xpath.rb +135 -0
  233. data/test/test_css_cache.rb +45 -0
  234. data/test/test_encoding_handler.rb +46 -0
  235. data/test/test_memory_leak.rb +52 -0
  236. data/test/test_nokogiri.rb +132 -0
  237. data/test/test_reader.rb +403 -0
  238. data/test/test_soap4r_sax.rb +52 -0
  239. data/test/test_xslt_transforms.rb +189 -0
  240. data/test/xml/node/test_save_options.rb +20 -0
  241. data/test/xml/node/test_subclass.rb +44 -0
  242. data/test/xml/sax/test_parser.rb +338 -0
  243. data/test/xml/sax/test_parser_context.rb +113 -0
  244. data/test/xml/sax/test_push_parser.rb +156 -0
  245. data/test/xml/test_attr.rb +65 -0
  246. data/test/xml/test_attribute_decl.rb +86 -0
  247. data/test/xml/test_builder.rb +210 -0
  248. data/test/xml/test_cdata.rb +50 -0
  249. data/test/xml/test_comment.rb +29 -0
  250. data/test/xml/test_document.rb +675 -0
  251. data/test/xml/test_document_encoding.rb +26 -0
  252. data/test/xml/test_document_fragment.rb +192 -0
  253. data/test/xml/test_dtd.rb +107 -0
  254. data/test/xml/test_dtd_encoding.rb +33 -0
  255. data/test/xml/test_element_content.rb +56 -0
  256. data/test/xml/test_element_decl.rb +73 -0
  257. data/test/xml/test_entity_decl.rb +122 -0
  258. data/test/xml/test_entity_reference.rb +21 -0
  259. data/test/xml/test_namespace.rb +70 -0
  260. data/test/xml/test_node.rb +899 -0
  261. data/test/xml/test_node_attributes.rb +34 -0
  262. data/test/xml/test_node_encoding.rb +107 -0
  263. data/test/xml/test_node_reparenting.rb +321 -0
  264. data/test/xml/test_node_set.rb +708 -0
  265. data/test/xml/test_parse_options.rb +52 -0
  266. data/test/xml/test_processing_instruction.rb +30 -0
  267. data/test/xml/test_reader_encoding.rb +126 -0
  268. data/test/xml/test_relax_ng.rb +60 -0
  269. data/test/xml/test_schema.rb +89 -0
  270. data/test/xml/test_syntax_error.rb +12 -0
  271. data/test/xml/test_text.rb +47 -0
  272. data/test/xml/test_unparented_node.rb +381 -0
  273. data/test/xml/test_xpath.rb +237 -0
  274. data/test/xslt/test_custom_functions.rb +94 -0
  275. metadata +525 -0
@@ -0,0 +1,55 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Tokenizer
4
+
5
+ macro
6
+ nl \n|\r\n|\r|\f
7
+ w [\s]*
8
+ nonascii [^\0-\177]
9
+ num -?([0-9]+|[0-9]*\.[0-9]+)
10
+ unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
11
+
12
+ escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
13
+ nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_A-Za-z]|{nonascii}|{escape}
15
+ ident [-@]?({nmstart})({nmchar})*
16
+ name ({nmchar})+
17
+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
18
+ string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
19
+ string {string1}|{string2}
20
+
21
+ rule
22
+
23
+ # [:state] pattern [actions]
24
+
25
+ has\({w} { [:HAS, text] }
26
+ {ident}\({w} { [:FUNCTION, text] }
27
+ {ident} { [:IDENT, text] }
28
+ \#{name} { [:HASH, text] }
29
+ {w}~={w} { [:INCLUDES, text] }
30
+ {w}\|={w} { [:DASHMATCH, text] }
31
+ {w}\^={w} { [:PREFIXMATCH, text] }
32
+ {w}\$={w} { [:SUFFIXMATCH, text] }
33
+ {w}\*={w} { [:SUBSTRINGMATCH, text] }
34
+ {w}!={w} { [:NOT_EQUAL, text] }
35
+ {w}={w} { [:EQUAL, text] }
36
+ {w}\) { [:RPAREN, text] }
37
+ {w}\[{w} { [:LSQUARE, text] }
38
+ {w}\] { [:RSQUARE, text] }
39
+ {w}\+{w} { [:PLUS, text] }
40
+ {w}>{w} { [:GREATER, text] }
41
+ {w},{w} { [:COMMA, text] }
42
+ {w}~{w} { [:TILDE, text] }
43
+ \:not\({w} { [:NOT, text] }
44
+ {num} { [:NUMBER, text] }
45
+ {w}\/\/{w} { [:DOUBLESLASH, text] }
46
+ {w}\/{w} { [:SLASH, text] }
47
+
48
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
49
+
50
+ [\s]+ { [:S, text] }
51
+ {string} { [:STRING, text] }
52
+ . { [text, text] }
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,171 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class XPathVisitor # :nodoc:
4
+ def visit_function node
5
+ # note that nth-child and nth-last-child are preprocessed in css/node.rb.
6
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
+ return self.send(msg, node) if self.respond_to?(msg)
8
+
9
+ case node.value.first
10
+ when /^text\(/
11
+ 'child::text()'
12
+ when /^self\(/
13
+ "self::#{node.value[1]}"
14
+ when /^eq\(/
15
+ "position() = #{node.value[1]}"
16
+ when /^(nth|nth-of-type|nth-child)\(/
17
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
18
+ an_plus_b(node.value[1])
19
+ else
20
+ "position() = #{node.value[1]}"
21
+ end
22
+ when /^(nth-last-child|nth-last-of-type)\(/
23
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
24
+ an_plus_b(node.value[1], :last => true)
25
+ else
26
+ index = node.value[1].to_i - 1
27
+ index == 0 ? "position() = last()" : "position() = last() - #{index}"
28
+ end
29
+ when /^(first|first-of-type)\(/
30
+ "position() = 1"
31
+ when /^(last|last-of-type)\(/
32
+ "position() = last()"
33
+ when /^contains\(/
34
+ "contains(., #{node.value[1]})"
35
+ when /^gt\(/
36
+ "position() > #{node.value[1]}"
37
+ when /^only-child\(/
38
+ "last() = 1"
39
+ when /^comment\(/
40
+ "comment()"
41
+ when /^has\(/
42
+ node.value[1].accept(self)
43
+ else
44
+ args = ['.'] + node.value[1..-1]
45
+ "#{node.value.first}#{args.join(', ')})"
46
+ end
47
+ end
48
+
49
+ def visit_not node
50
+ child = node.value.first
51
+ if :ELEMENT_NAME == child.type
52
+ "not(self::#{child.accept(self)})"
53
+ else
54
+ "not(#{child.accept(self)})"
55
+ end
56
+ end
57
+
58
+ def visit_id node
59
+ node.value.first =~ /^#(.*)$/
60
+ "@id = '#{$1}'"
61
+ end
62
+
63
+ def visit_attribute_condition node
64
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
65
+ ''
66
+ else
67
+ '@'
68
+ end
69
+ attribute += node.value.first.accept(self)
70
+
71
+ # Support non-standard css
72
+ attribute.gsub!(/^@@/, '@')
73
+
74
+ return attribute unless node.value.length == 3
75
+
76
+ value = node.value.last
77
+ value = "'#{value}'" if value !~ /^['"]/
78
+
79
+ case node.value[1]
80
+ when :equal
81
+ attribute + " = " + "#{value}"
82
+ when :not_equal
83
+ attribute + " != " + "#{value}"
84
+ when :substring_match
85
+ "contains(#{attribute}, #{value})"
86
+ when :prefix_match
87
+ "starts-with(#{attribute}, #{value})"
88
+ when :dash_match
89
+ "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
90
+ when :includes
91
+ "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
92
+ when :suffix_match
93
+ "substring(#{attribute}, string-length(#{attribute}) - " +
94
+ "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
95
+ else
96
+ attribute + " #{node.value[1]} " + "#{value}"
97
+ end
98
+ end
99
+
100
+ def visit_pseudo_class node
101
+ if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
102
+ node.value.first.accept(self)
103
+ else
104
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
105
+ return self.send(msg, node) if self.respond_to?(msg)
106
+
107
+ case node.value.first
108
+ when "first", "first-child" then "position() = 1"
109
+ when "last", "last-child" then "position() = last()"
110
+ when "first-of-type" then "position() = 1"
111
+ when "last-of-type" then "position() = last()"
112
+ when "only-of-type" then "last() = 1"
113
+ when "empty" then "not(node())"
114
+ when "parent" then "node()"
115
+ when "root" then "not(parent::*)"
116
+ else
117
+ node.value.first + "(.)"
118
+ end
119
+ end
120
+ end
121
+
122
+ def visit_class_condition node
123
+ "contains(concat(' ', @class, ' '), ' #{node.value.first} ')"
124
+ end
125
+
126
+ {
127
+ 'combinator' => ' and ',
128
+ 'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
129
+ 'preceding_selector' => "/following-sibling::",
130
+ 'descendant_selector' => '//',
131
+ 'child_selector' => '/',
132
+ }.each do |k,v|
133
+ class_eval %{
134
+ def visit_#{k} node
135
+ "\#{node.value.first.accept(self)}#{v}\#{node.value.last.accept(self)}"
136
+ end
137
+ }
138
+ end
139
+
140
+ def visit_conditional_selector node
141
+ node.value.first.accept(self) + '[' +
142
+ node.value.last.accept(self) + ']'
143
+ end
144
+
145
+ def visit_element_name node
146
+ node.value.first
147
+ end
148
+
149
+ def accept node
150
+ node.accept(self)
151
+ end
152
+
153
+ private
154
+ def an_plus_b node, options={}
155
+ raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
156
+
157
+ a = node.value[0].to_i
158
+ b = node.value[3].to_i
159
+ position = options[:last] ? "(last()-position()+1)" : "position()"
160
+
161
+ if (b == 0)
162
+ return "(#{position} mod #{a}) = 0"
163
+ else
164
+ compare = (a < 0) ? "<=" : ">="
165
+ return "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
166
+ end
167
+ end
168
+
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,35 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ ###
4
+ # The Slop decorator implements method missing such that a methods may be
5
+ # used instead of XPath or CSS. See Nokogiri.Slop
6
+ module Slop
7
+ ###
8
+ # look for node with +name+. See Nokogiri.Slop
9
+ def method_missing name, *args, &block
10
+ prefix = implied_xpath_context
11
+
12
+ if args.empty?
13
+ list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
14
+ elsif args.first.is_a? Hash
15
+ hash = args.first
16
+ if hash[:css]
17
+ list = css("#{name}#{hash[:css]}")
18
+ elsif hash[:xpath]
19
+ conds = Array(hash[:xpath]).join(' and ')
20
+ list = xpath("#{prefix}#{name}[#{conds}]")
21
+ end
22
+ else
23
+ CSS::Parser.without_cache do
24
+ list = xpath(
25
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => prefix)
26
+ )
27
+ end
28
+ end
29
+
30
+ super if list.empty?
31
+ list.length == 1 ? list.first : list
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,36 @@
1
+ require 'nokogiri/html/entity_lookup'
2
+ require 'nokogiri/html/document'
3
+ require 'nokogiri/html/document_fragment'
4
+ require 'nokogiri/html/sax/parser_context'
5
+ require 'nokogiri/html/sax/parser'
6
+ require 'nokogiri/html/element_description'
7
+ require 'nokogiri/html/element_description_defaults'
8
+
9
+ module Nokogiri
10
+ class << self
11
+ ###
12
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
13
+ def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
14
+ Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
15
+ end
16
+ end
17
+
18
+ module HTML
19
+ class << self
20
+ ###
21
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
22
+ def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
23
+ Document.parse(thing, url, encoding, options, &block)
24
+ end
25
+
26
+ ####
27
+ # Parse a fragment from +string+ in to a NodeSet.
28
+ def fragment string, encoding = nil
29
+ HTML::DocumentFragment.parse string, encoding
30
+ end
31
+ end
32
+
33
+ # Instance of Nokogiri::HTML::EntityLookup
34
+ NamedCharacters = EntityLookup.new
35
+ end
36
+ end
@@ -0,0 +1,35 @@
1
+ module Nokogiri
2
+ module HTML
3
+ ###
4
+ # Nokogiri HTML builder is used for building HTML documents. It is very
5
+ # similar to the Nokogiri::XML::Builder. In fact, you should go read the
6
+ # documentation for Nokogiri::XML::Builder before reading this
7
+ # documentation.
8
+ #
9
+ # == Synopsis:
10
+ #
11
+ # Create an HTML document with a body that has an onload attribute, and a
12
+ # span tag with a class of "bold" that has content of "Hello world".
13
+ #
14
+ # builder = Nokogiri::HTML::Builder.new do |doc|
15
+ # doc.html {
16
+ # doc.body(:onload => 'some_func();') {
17
+ # doc.span.bold {
18
+ # doc.text "Hello world"
19
+ # }
20
+ # }
21
+ # }
22
+ # end
23
+ # puts builder.to_html
24
+ #
25
+ # The HTML builder inherits from the XML builder, so make sure to read the
26
+ # Nokogiri::XML::Builder documentation.
27
+ class Builder < Nokogiri::XML::Builder
28
+ ###
29
+ # Convert the builder to HTML
30
+ def to_html
31
+ @doc.to_html
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,221 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class Document < Nokogiri::XML::Document
4
+ ###
5
+ # Get the meta tag encoding for this document. If there is no meta tag,
6
+ # then nil is returned.
7
+ def meta_encoding
8
+ meta = meta_content_type and
9
+ /charset\s*=\s*([\w-]+)/i.match(meta['content'])[1]
10
+ end
11
+
12
+ ###
13
+ # Set the meta tag encoding for this document. If there is no meta
14
+ # content tag, the encoding is not set.
15
+ def meta_encoding= encoding
16
+ meta = meta_content_type and
17
+ meta['content'] = "text/html; charset=%s" % encoding
18
+ end
19
+
20
+ def meta_content_type
21
+ css('meta').find { |node|
22
+ node['http-equiv'] =~ /\AContent-Type\z/i
23
+ }
24
+ end
25
+ private :meta_content_type
26
+
27
+ ###
28
+ # Get the title string of this document. Return nil if there is
29
+ # no title tag.
30
+ def title
31
+ title = at('head title') and title.inner_text
32
+ end
33
+
34
+ ###
35
+ # Set the title string of this document. If there is no head
36
+ # element, the title is not set.
37
+ def title=(text)
38
+ unless title = at('head title')
39
+ head = at('head') or return nil
40
+ title = Nokogiri::XML::Node.new('title', self)
41
+ head << title
42
+ end
43
+ title.children = XML::Text.new(text, self)
44
+ end
45
+
46
+ ####
47
+ # Serialize Node using +options+. Save options can also be set using a
48
+ # block. See SaveOptions.
49
+ #
50
+ # These two statements are equivalent:
51
+ #
52
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
53
+ #
54
+ # or
55
+ #
56
+ # node.serialize(:encoding => 'UTF-8') do |config|
57
+ # config.format.as_xml
58
+ # end
59
+ #
60
+ def serialize options = {}
61
+ options[:save_with] ||= XML::Node::SaveOptions::FORMAT |
62
+ XML::Node::SaveOptions::AS_HTML |
63
+ XML::Node::SaveOptions::NO_DECLARATION |
64
+ XML::Node::SaveOptions::NO_EMPTY_TAGS
65
+ super
66
+ end
67
+
68
+ ####
69
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
70
+ def fragment tags = nil
71
+ DocumentFragment.new(self, tags, self.root)
72
+ end
73
+
74
+ class << self
75
+ ###
76
+ # Parse HTML. +thing+ may be a String, or any object that
77
+ # responds to _read_ and _close_ such as an IO, or StringIO.
78
+ # +url+ is resource where this document is located. +encoding+ is the
79
+ # encoding that should be used when processing the document. +options+
80
+ # is a number that sets options in the parser, such as
81
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
82
+ # Nokogiri::XML::ParseOptions.
83
+ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
84
+
85
+ options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
86
+ # Give the options to the user
87
+ yield options if block_given?
88
+
89
+ if string_or_io.respond_to?(:encoding)
90
+ unless string_or_io.encoding.name == "ASCII-8BIT"
91
+ encoding ||= string_or_io.encoding.name
92
+ end
93
+ end
94
+
95
+ if string_or_io.respond_to?(:read)
96
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
97
+ if !encoding
98
+ # Perform further encoding detection that libxml2 does
99
+ # not do.
100
+ string_or_io = EncodingReader.new(string_or_io)
101
+ begin
102
+ return read_io(string_or_io, url, encoding, options.to_i)
103
+ rescue EncodingFoundException => e
104
+ # A retry is required because libxml2 has a problem in
105
+ # that it cannot switch encoding well in the middle of
106
+ # parsing, especially if it has already seen a
107
+ # non-ASCII character when it finds an encoding hint.
108
+ encoding = e.encoding
109
+ end
110
+ end
111
+ return read_io(string_or_io, url, encoding, options.to_i)
112
+ end
113
+
114
+ # read_memory pukes on empty docs
115
+ return new if string_or_io.nil? or string_or_io.empty?
116
+
117
+ if !encoding
118
+ encoding = EncodingReader.detect_encoding(string_or_io)
119
+ end
120
+
121
+ read_memory(string_or_io, url, encoding, options.to_i)
122
+ end
123
+ end
124
+
125
+ class EncodingFoundException < Exception # :nodoc:
126
+ attr_reader :encoding
127
+
128
+ def initialize(encoding)
129
+ @encoding = encoding
130
+ super("encoding found: %s" % encoding)
131
+ end
132
+ end
133
+
134
+ class EncodingReader # :nodoc:
135
+ class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
136
+ attr_reader :encoding
137
+
138
+ def found(encoding)
139
+ @encoding = encoding
140
+ throw :found
141
+ end
142
+
143
+ def not_found(encoding)
144
+ found nil
145
+ end
146
+
147
+ def start_element(name, attrs = [])
148
+ case name
149
+ when 'head'
150
+ @head = true
151
+ when 'body'
152
+ not_found
153
+ when 'meta'
154
+ @head or return
155
+ attr = Hash[attrs]
156
+ http_equiv = attr['http-equiv'] and
157
+ http_equiv.match(/\AContent-Type\z/i) and
158
+ content = attr['content'] and
159
+ m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
160
+ found m[1]
161
+ end
162
+ end
163
+
164
+ def end_element(name)
165
+ if name == 'head'
166
+ not_found
167
+ end
168
+ end
169
+ end
170
+
171
+ def self.detect_encoding(chunk)
172
+ m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
173
+ return Nokogiri.XML(m[1]).encoding
174
+
175
+ handler = SAXHandler.new
176
+ parser = Nokogiri::HTML::SAX::Parser.new(handler)
177
+ catch(:found) {
178
+ parser.parse(chunk)
179
+ }
180
+ handler.encoding
181
+ rescue => e
182
+ nil
183
+ end
184
+
185
+ def initialize(io)
186
+ @io = io
187
+ @firstchunk = nil
188
+ end
189
+
190
+ def read(len)
191
+ # no support for a call without len
192
+
193
+ if !@firstchunk
194
+ @firstchunk = @io.read(len) or return nil
195
+
196
+ # This implementation expects and assumes that the first
197
+ # call from htmlReadIO() is made with a length long enough
198
+ # (~1KB) to achieve further encoding detection that
199
+ # libxml2 does not do.
200
+ if encoding = EncodingReader.detect_encoding(@firstchunk)
201
+ raise EncodingFoundException, encoding
202
+ end
203
+
204
+ # This chunk is stored for the next read in retry.
205
+ return @firstchunk
206
+ end
207
+
208
+ ret = @firstchunk.slice!(0, len)
209
+ if (len -= ret.length) > 0
210
+ rest = @io.read(len) and ret << rest
211
+ end
212
+ if ret.empty?
213
+ nil
214
+ else
215
+ ret
216
+ end
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end