nokogiri 1.3.0-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (256) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +233 -0
  3. data/CHANGELOG.rdoc +222 -0
  4. data/Manifest.txt +247 -0
  5. data/README.ja.rdoc +103 -0
  6. data/README.rdoc +117 -0
  7. data/Rakefile +205 -0
  8. data/bin/nokogiri +47 -0
  9. data/ext/nokogiri/extconf.rb +89 -0
  10. data/ext/nokogiri/html_document.c +183 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +30 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser.c +57 -0
  17. data/ext/nokogiri/html_sax_parser.h +11 -0
  18. data/ext/nokogiri/iconv.dll +0 -0
  19. data/ext/nokogiri/libexslt.dll +0 -0
  20. data/ext/nokogiri/libxml2.dll +0 -0
  21. data/ext/nokogiri/libxslt.dll +0 -0
  22. data/ext/nokogiri/nokogiri.c +81 -0
  23. data/ext/nokogiri/nokogiri.h +149 -0
  24. data/ext/nokogiri/xml_attr.c +92 -0
  25. data/ext/nokogiri/xml_attr.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +53 -0
  27. data/ext/nokogiri/xml_cdata.h +9 -0
  28. data/ext/nokogiri/xml_comment.c +51 -0
  29. data/ext/nokogiri/xml_comment.h +9 -0
  30. data/ext/nokogiri/xml_document.c +308 -0
  31. data/ext/nokogiri/xml_document.h +21 -0
  32. data/ext/nokogiri/xml_document_fragment.c +48 -0
  33. data/ext/nokogiri/xml_document_fragment.h +10 -0
  34. data/ext/nokogiri/xml_dtd.c +102 -0
  35. data/ext/nokogiri/xml_dtd.h +8 -0
  36. data/ext/nokogiri/xml_entity_reference.c +50 -0
  37. data/ext/nokogiri/xml_entity_reference.h +9 -0
  38. data/ext/nokogiri/xml_io.c +24 -0
  39. data/ext/nokogiri/xml_io.h +10 -0
  40. data/ext/nokogiri/xml_namespace.c +69 -0
  41. data/ext/nokogiri/xml_namespace.h +12 -0
  42. data/ext/nokogiri/xml_node.c +928 -0
  43. data/ext/nokogiri/xml_node.h +14 -0
  44. data/ext/nokogiri/xml_node_set.c +386 -0
  45. data/ext/nokogiri/xml_node_set.h +9 -0
  46. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  47. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  48. data/ext/nokogiri/xml_reader.c +572 -0
  49. data/ext/nokogiri/xml_reader.h +10 -0
  50. data/ext/nokogiri/xml_relax_ng.c +106 -0
  51. data/ext/nokogiri/xml_relax_ng.h +9 -0
  52. data/ext/nokogiri/xml_sax_parser.c +336 -0
  53. data/ext/nokogiri/xml_sax_parser.h +10 -0
  54. data/ext/nokogiri/xml_sax_push_parser.c +86 -0
  55. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  56. data/ext/nokogiri/xml_schema.c +107 -0
  57. data/ext/nokogiri/xml_schema.h +9 -0
  58. data/ext/nokogiri/xml_syntax_error.c +203 -0
  59. data/ext/nokogiri/xml_syntax_error.h +12 -0
  60. data/ext/nokogiri/xml_text.c +47 -0
  61. data/ext/nokogiri/xml_text.h +9 -0
  62. data/ext/nokogiri/xml_xpath.c +53 -0
  63. data/ext/nokogiri/xml_xpath.h +11 -0
  64. data/ext/nokogiri/xml_xpath_context.c +252 -0
  65. data/ext/nokogiri/xml_xpath_context.h +9 -0
  66. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  67. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  68. data/ext/nokogiri/zlib1.dll +0 -0
  69. data/lib/action-nokogiri.rb +36 -0
  70. data/lib/nokogiri.rb +110 -0
  71. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  72. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  73. data/lib/nokogiri/css.rb +25 -0
  74. data/lib/nokogiri/css/generated_parser.rb +748 -0
  75. data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
  76. data/lib/nokogiri/css/node.rb +107 -0
  77. data/lib/nokogiri/css/parser.rb +82 -0
  78. data/lib/nokogiri/css/parser.y +227 -0
  79. data/lib/nokogiri/css/syntax_error.rb +7 -0
  80. data/lib/nokogiri/css/tokenizer.rb +11 -0
  81. data/lib/nokogiri/css/tokenizer.rex +54 -0
  82. data/lib/nokogiri/css/xpath_visitor.rb +172 -0
  83. data/lib/nokogiri/decorators.rb +2 -0
  84. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  85. data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
  86. data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
  87. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
  88. data/lib/nokogiri/decorators/slop.rb +33 -0
  89. data/lib/nokogiri/ffi/html/document.rb +37 -0
  90. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  91. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  92. data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
  93. data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
  94. data/lib/nokogiri/ffi/libxml.rb +314 -0
  95. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  96. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  97. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  98. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  100. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  102. data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
  103. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  104. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  105. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  106. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  107. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  108. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  109. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
  110. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  111. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  112. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  113. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  114. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  115. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  117. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  118. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  119. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  120. data/lib/nokogiri/ffi/xml/document.rb +107 -0
  121. data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
  122. data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
  123. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  124. data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
  125. data/lib/nokogiri/ffi/xml/node.rb +380 -0
  126. data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
  127. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  128. data/lib/nokogiri/ffi/xml/reader.rb +217 -0
  129. data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
  130. data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
  131. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
  132. data/lib/nokogiri/ffi/xml/schema.rb +55 -0
  133. data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
  134. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  135. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  136. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  137. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  138. data/lib/nokogiri/hpricot.rb +62 -0
  139. data/lib/nokogiri/html.rb +34 -0
  140. data/lib/nokogiri/html/builder.rb +35 -0
  141. data/lib/nokogiri/html/document.rb +71 -0
  142. data/lib/nokogiri/html/document_fragment.rb +15 -0
  143. data/lib/nokogiri/html/element_description.rb +23 -0
  144. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  145. data/lib/nokogiri/html/sax/parser.rb +47 -0
  146. data/lib/nokogiri/nokogiri.rb +1 -0
  147. data/lib/nokogiri/syntax_error.rb +4 -0
  148. data/lib/nokogiri/version.rb +29 -0
  149. data/lib/nokogiri/version_warning.rb +11 -0
  150. data/lib/nokogiri/xml.rb +62 -0
  151. data/lib/nokogiri/xml/attr.rb +9 -0
  152. data/lib/nokogiri/xml/builder.rb +254 -0
  153. data/lib/nokogiri/xml/cdata.rb +11 -0
  154. data/lib/nokogiri/xml/document.rb +100 -0
  155. data/lib/nokogiri/xml/document_fragment.rb +49 -0
  156. data/lib/nokogiri/xml/dtd.rb +11 -0
  157. data/lib/nokogiri/xml/entity_declaration.rb +11 -0
  158. data/lib/nokogiri/xml/fragment_handler.rb +55 -0
  159. data/lib/nokogiri/xml/namespace.rb +7 -0
  160. data/lib/nokogiri/xml/node.rb +745 -0
  161. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  162. data/lib/nokogiri/xml/node_set.rb +238 -0
  163. data/lib/nokogiri/xml/notation.rb +6 -0
  164. data/lib/nokogiri/xml/parse_options.rb +80 -0
  165. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  166. data/lib/nokogiri/xml/reader.rb +66 -0
  167. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  168. data/lib/nokogiri/xml/sax.rb +3 -0
  169. data/lib/nokogiri/xml/sax/document.rb +143 -0
  170. data/lib/nokogiri/xml/sax/parser.rb +101 -0
  171. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  172. data/lib/nokogiri/xml/schema.rb +65 -0
  173. data/lib/nokogiri/xml/syntax_error.rb +34 -0
  174. data/lib/nokogiri/xml/xpath.rb +10 -0
  175. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  176. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  177. data/lib/nokogiri/xslt.rb +48 -0
  178. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  179. data/lib/xsd/xmlparser/nokogiri.rb +64 -0
  180. data/tasks/test.rb +161 -0
  181. data/test/css/test_nthiness.rb +160 -0
  182. data/test/css/test_parser.rb +277 -0
  183. data/test/css/test_tokenizer.rb +176 -0
  184. data/test/css/test_xpath_visitor.rb +76 -0
  185. data/test/ffi/test_document.rb +35 -0
  186. data/test/files/address_book.rlx +12 -0
  187. data/test/files/address_book.xml +10 -0
  188. data/test/files/dont_hurt_em_why.xml +422 -0
  189. data/test/files/exslt.xml +8 -0
  190. data/test/files/exslt.xslt +35 -0
  191. data/test/files/po.xml +32 -0
  192. data/test/files/po.xsd +66 -0
  193. data/test/files/staff.xml +59 -0
  194. data/test/files/staff.xslt +32 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/helper.rb +123 -0
  197. data/test/hpricot/files/basic.xhtml +17 -0
  198. data/test/hpricot/files/boingboing.html +2266 -0
  199. data/test/hpricot/files/cy0.html +3653 -0
  200. data/test/hpricot/files/immob.html +400 -0
  201. data/test/hpricot/files/pace_application.html +1320 -0
  202. data/test/hpricot/files/tenderlove.html +16 -0
  203. data/test/hpricot/files/uswebgen.html +220 -0
  204. data/test/hpricot/files/utf8.html +1054 -0
  205. data/test/hpricot/files/week9.html +1723 -0
  206. data/test/hpricot/files/why.xml +19 -0
  207. data/test/hpricot/load_files.rb +11 -0
  208. data/test/hpricot/test_alter.rb +68 -0
  209. data/test/hpricot/test_builder.rb +20 -0
  210. data/test/hpricot/test_parser.rb +426 -0
  211. data/test/hpricot/test_paths.rb +15 -0
  212. data/test/hpricot/test_preserved.rb +77 -0
  213. data/test/hpricot/test_xml.rb +30 -0
  214. data/test/html/sax/test_parser.rb +52 -0
  215. data/test/html/test_builder.rb +156 -0
  216. data/test/html/test_document.rb +361 -0
  217. data/test/html/test_document_encoding.rb +46 -0
  218. data/test/html/test_document_fragment.rb +97 -0
  219. data/test/html/test_element_description.rb +95 -0
  220. data/test/html/test_named_characters.rb +14 -0
  221. data/test/html/test_node.rb +165 -0
  222. data/test/test_convert_xpath.rb +186 -0
  223. data/test/test_css_cache.rb +56 -0
  224. data/test/test_gc.rb +15 -0
  225. data/test/test_memory_leak.rb +77 -0
  226. data/test/test_nokogiri.rb +127 -0
  227. data/test/test_reader.rb +316 -0
  228. data/test/test_xslt_transforms.rb +131 -0
  229. data/test/xml/node/test_save_options.rb +20 -0
  230. data/test/xml/node/test_subclass.rb +44 -0
  231. data/test/xml/sax/test_parser.rb +169 -0
  232. data/test/xml/sax/test_push_parser.rb +92 -0
  233. data/test/xml/test_attr.rb +38 -0
  234. data/test/xml/test_builder.rb +73 -0
  235. data/test/xml/test_cdata.rb +38 -0
  236. data/test/xml/test_comment.rb +23 -0
  237. data/test/xml/test_document.rb +397 -0
  238. data/test/xml/test_document_encoding.rb +26 -0
  239. data/test/xml/test_document_fragment.rb +76 -0
  240. data/test/xml/test_dtd.rb +42 -0
  241. data/test/xml/test_dtd_encoding.rb +31 -0
  242. data/test/xml/test_entity_reference.rb +21 -0
  243. data/test/xml/test_namespace.rb +43 -0
  244. data/test/xml/test_node.rb +808 -0
  245. data/test/xml/test_node_attributes.rb +34 -0
  246. data/test/xml/test_node_encoding.rb +84 -0
  247. data/test/xml/test_node_set.rb +368 -0
  248. data/test/xml/test_parse_options.rb +52 -0
  249. data/test/xml/test_processing_instruction.rb +30 -0
  250. data/test/xml/test_reader_encoding.rb +126 -0
  251. data/test/xml/test_relax_ng.rb +60 -0
  252. data/test/xml/test_schema.rb +65 -0
  253. data/test/xml/test_text.rb +18 -0
  254. data/test/xml/test_unparented_node.rb +381 -0
  255. data/test/xml/test_xpath.rb +106 -0
  256. metadata +409 -0
@@ -0,0 +1,7 @@
1
+ require 'nokogiri/syntax_error'
2
+ module Nokogiri
3
+ module CSS
4
+ class SyntaxError < ::Nokogiri::SyntaxError
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,11 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Tokenizer < GeneratedTokenizer
4
+ ###
5
+ # Scan and tokenize +str+
6
+ def scan(str)
7
+ scan_evaluate(str)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,54 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class GeneratedTokenizer < GeneratedParser
4
+
5
+ macro
6
+ nl \n|\r\n|\r|\f
7
+ w [\s\r\n\f]*
8
+ nonascii [^\\\\0-\\\\177]
9
+ num -?([0-9]+|[0-9]*\.[0-9]+)
10
+ unicode \\\\\\\\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?
11
+
12
+ escape {unicode}|\\\\\\\[^\n\r\f0-9A-Fa-f]
13
+ nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_A-Za-z]|{nonascii}|{escape}
15
+ ident [-@]?({nmstart})({nmchar})*
16
+ name ({nmchar})+
17
+ string1 "([^\n\r\f"]|\\{nl}|{nonascii}|{escape})*"
18
+ string2 '([^\n\r\f']|\\{nl}|{nonascii}|{escape})*'
19
+ string {string1}|{string2}
20
+
21
+ rule
22
+
23
+ # [:state] pattern [actions]
24
+
25
+ {ident}\(\s* { [:FUNCTION, text] }
26
+ {ident} { [:IDENT, text] }
27
+ \#{name} { [:HASH, text] }
28
+ {w}~={w} { [:INCLUDES, text] }
29
+ {w}\|={w} { [:DASHMATCH, text] }
30
+ {w}\^={w} { [:PREFIXMATCH, text] }
31
+ {w}\$={w} { [:SUFFIXMATCH, text] }
32
+ {w}\*={w} { [:SUBSTRINGMATCH, text] }
33
+ {w}!={w} { [:NOT_EQUAL, text] }
34
+ {w}={w} { [:EQUAL, text] }
35
+ {w}\) { [:RPAREN, text] }
36
+ {w}\[{w} { [:LSQUARE, text] }
37
+ {w}\] { [:RSQUARE, text] }
38
+ {w}\+{w} { [:PLUS, text] }
39
+ {w}>{w} { [:GREATER, text] }
40
+ {w},{w} { [:COMMA, text] }
41
+ {w}~{w} { [:TILDE, text] }
42
+ \:not\({w} { [:NOT, text] }
43
+ {num} { [:NUMBER, text] }
44
+ {w}\/\/{w} { [:DOUBLESLASH, text] }
45
+ {w}\/{w} { [:SLASH, text] }
46
+
47
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
48
+
49
+ [\s\t\r\n\f]+ { [:S, text] }
50
+ {string} { [:STRING, text] }
51
+ . { [text, text] }
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,172 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class XPathVisitor # :nodoc:
4
+ def visit_function node
5
+ # note that nth-child and nth-last-child are preprocessed in css/node.rb.
6
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
+ return self.send(msg, node) if self.respond_to?(msg)
8
+
9
+ case node.value.first
10
+ when /^text\(/
11
+ 'child::text()'
12
+ when /^self\(/
13
+ "self::#{node.value[1]}"
14
+ when /^(eq|nth|nth-of-type|nth-child)\(/
15
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
16
+ an_plus_b(node.value[1])
17
+ else
18
+ "position() = " + node.value[1]
19
+ end
20
+ when /^(first|first-of-type)\(/
21
+ "position() = 1"
22
+ when /^(last|last-of-type)\(/
23
+ "position() = last()"
24
+ when /^(nth-last-child|nth-last-of-type)\(/
25
+ "position() = last() - #{node.value[1]}"
26
+ when /^contains\(/
27
+ "contains(., #{node.value[1]})"
28
+ when /^gt\(/
29
+ "position() > #{node.value[1]}"
30
+ when /^only-child\(/
31
+ "last() = 1"
32
+ when /^comment\(/
33
+ "comment()"
34
+ else
35
+ args = ['.'] + node.value[1..-1]
36
+ "#{node.value.first}#{args.join(', ')})"
37
+ end
38
+ end
39
+
40
+ def visit_not node
41
+ 'not(' + node.value.first.accept(self) + ')'
42
+ end
43
+
44
+ def visit_preceding_selector node
45
+ node.value.last.accept(self) +
46
+ '[preceding-sibling::' +
47
+ node.value.first.accept(self) +
48
+ ']'
49
+ end
50
+
51
+ def visit_direct_adjacent_selector node
52
+ node.value.first.accept(self) +
53
+ "/following-sibling::*[1]/self::" +
54
+ node.value.last.accept(self)
55
+ end
56
+
57
+ def visit_id node
58
+ node.value.first =~ /^#(.*)$/
59
+ "@id = '#{$1}'"
60
+ end
61
+
62
+ def visit_attribute_condition node
63
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
64
+ ''
65
+ else
66
+ '@'
67
+ end
68
+ attribute += node.value.first.accept(self)
69
+
70
+ # Support non-standard css
71
+ attribute.gsub!(/^@@/, '@')
72
+
73
+ return attribute unless node.value.length == 3
74
+
75
+ value = node.value.last
76
+ value = "'#{value}'" if value !~ /^['"]/
77
+
78
+ case node.value[1]
79
+ when :equal
80
+ attribute + " = " + "#{value}"
81
+ when :not_equal
82
+ attribute + " != " + "#{value}"
83
+ when :substring_match
84
+ "contains(#{attribute}, #{value})"
85
+ when :prefix_match
86
+ "starts-with(#{attribute}, #{value})"
87
+ when :dash_match
88
+ "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
89
+ when :includes
90
+ "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
91
+ when :suffix_match
92
+ "substring(#{attribute}, string-length(#{attribute}) - " +
93
+ "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
94
+ else
95
+ attribute + " #{node.value[1]} " + "#{value}"
96
+ end
97
+ end
98
+
99
+ def visit_pseudo_class node
100
+ if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
101
+ node.value.first.accept(self)
102
+ else
103
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
104
+ return self.send(msg, node) if self.respond_to?(msg)
105
+
106
+ case node.value.first
107
+ when "first" then "position() = 1"
108
+ when "last" then "position() = last()"
109
+ when "first-of-type" then "position() = 1"
110
+ when "last-of-type" then "position() = last()"
111
+ when "only-of-type" then "last() = 1"
112
+ when "empty" then "not(node())"
113
+ when "parent" then "node()"
114
+ when "root" then "not(parent::*)"
115
+ else
116
+ node.value.first + "(.)"
117
+ end
118
+ end
119
+ end
120
+
121
+ def visit_class_condition node
122
+ "contains(concat(' ', @class, ' '), ' #{node.value.first} ')"
123
+ end
124
+
125
+ def visit_combinator node
126
+ node.value.first.accept(self) + ' and ' +
127
+ node.value.last.accept(self)
128
+ end
129
+
130
+ def visit_conditional_selector node
131
+ node.value.first.accept(self) + '[' +
132
+ node.value.last.accept(self) + ']'
133
+ end
134
+
135
+ def visit_descendant_selector node
136
+ node.value.first.accept(self) +
137
+ '//' +
138
+ node.value.last.accept(self)
139
+ end
140
+
141
+ def visit_child_selector node
142
+ node.value.first.accept(self) +
143
+ '/' +
144
+ node.value.last.accept(self)
145
+ end
146
+
147
+ def visit_element_name node
148
+ node.value.first
149
+ end
150
+
151
+ def accept node
152
+ node.accept(self)
153
+ end
154
+
155
+ private
156
+ def an_plus_b node
157
+ raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
158
+
159
+ a = node.value[0].to_i
160
+ b = node.value[3].to_i
161
+
162
+ if (b == 0)
163
+ return "(position() mod #{a}) = 0"
164
+ else
165
+ compare = (a < 0) ? "<=" : ">="
166
+ return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
167
+ end
168
+ end
169
+
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,2 @@
1
+ require 'nokogiri/decorators/hpricot'
2
+ require 'nokogiri/decorators/slop'
@@ -0,0 +1,3 @@
1
+ require 'nokogiri/decorators/hpricot/node'
2
+ require 'nokogiri/decorators/hpricot/node_set'
3
+ require 'nokogiri/decorators/hpricot/xpath_visitor'
@@ -0,0 +1,56 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ module Hpricot
4
+ module Node # :nodoc:
5
+ def search *paths
6
+ ns = paths.last.is_a?(Hash) ? paths.pop : {}
7
+ converted = paths.map { |path|
8
+ convert_to_xpath(path)
9
+ }.flatten.uniq
10
+
11
+ super(*converted + [ns])
12
+ end
13
+ def /(path); search(path) end
14
+
15
+ def xpath *args
16
+ return super if args.length > 0
17
+ path
18
+ end
19
+
20
+ def raw_attributes; self end
21
+
22
+ def get_element_by_id element_id
23
+ search("//*[@id='#{element_id}']").first
24
+ end
25
+
26
+ def get_elements_by_tag_name tag
27
+ search("//#{tag}")
28
+ end
29
+
30
+ def convert_to_xpath(rule)
31
+ rule = rule.to_s
32
+ case rule
33
+ when %r{^//}
34
+ [".#{Hpricot::XPathVisitor.xpath_namespace_helper(rule)}"]
35
+ when %r{^/}
36
+ [Hpricot::XPathVisitor.xpath_namespace_helper(rule)]
37
+ when %r{^.//}
38
+ [Hpricot::XPathVisitor.xpath_namespace_helper(rule)]
39
+ else
40
+ visitor = CSS::XPathVisitor.new
41
+ visitor.extend(Hpricot::XPathVisitor)
42
+ CSS.xpath_for(rule, :prefix => ".//", :visitor => visitor)
43
+ end
44
+ end
45
+
46
+ def target
47
+ name
48
+ end
49
+
50
+ def to_original_html
51
+ to_html
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,54 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ module Hpricot
4
+ module NodeSet
5
+
6
+ # Select nodes matching the supplied rule.
7
+ # Note that positional rules (like <tt>:nth()</tt>) aren't currently supported.
8
+ #
9
+ # example:
10
+ # node_set.filter('.ohmy') # selects nodes from the set with class "ohmy"
11
+ # node_set.filter('a#link2') # selects nodes from the set with child node <a id='link2'>
12
+ # node_set.filter('a[@id="link2"]') # selects nodes from the set with child node <a id='link2'>
13
+ def filter(rule)
14
+ filter_transformer( lambda {|j| j}, rule ) # identity transformer
15
+ end
16
+
17
+ # The complement to filter, select nodes <em>not</em> matching the supplied rule.
18
+ # Note that positional rules (like <tt>:nth()</tt>) aren't currently supported.
19
+ #
20
+ # See filter for examples.
21
+ #
22
+ # Also note that you can pass a XML::Node object instead of a
23
+ # rule to remove that object from the node set (if it is
24
+ # present):
25
+ # node_set.not(node_to_exclude) # selects all nodes EXCEPT node_to_exclude
26
+ #
27
+ def not(rule)
28
+ filter_transformer( lambda {|j| !j}, rule ) # negation transformer
29
+ end
30
+
31
+ private
32
+ def filter_transformer(transformer, rule) # :nodoc:
33
+ sub_set = XML::NodeSet.new(document)
34
+ document.decorate(sub_set)
35
+
36
+ if rule.is_a?(XML::Node)
37
+ each { |node| sub_set << node if transformer.call(node == rule) }
38
+ return sub_set
39
+ end
40
+
41
+ ctx = CSS.parse(rule.to_s)
42
+ visitor = CSS::XPathVisitor.new
43
+ visitor.extend(Hpricot::XPathVisitor)
44
+ each do |node|
45
+ if transformer.call(node.at(".//self::" + visitor.accept(ctx.first)))
46
+ sub_set << node
47
+ end
48
+ end
49
+ sub_set
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,30 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ module Hpricot
4
+ ####
5
+ # This mixin does custom adjustments to deal with _whyML
6
+ module XPathVisitor
7
+ ###
8
+ # Visit attribute condition nodes with +node+
9
+ def visit_attribute_condition node
10
+ unless (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /^@/)
11
+ node.value.first.value[0] = "child::" +
12
+ node.value.first.value[0]
13
+ end
14
+ super(node).gsub(/child::text\(\)/, 'normalize-space(child::text())')
15
+ end
16
+
17
+ # take a path like '//t:sam' and convert to xpath "*[name()='t:sam']"
18
+ def self.xpath_namespace_helper rule
19
+ rule.split(/\//).collect do |tag|
20
+ if match = tag.match(/^(\w+:\w+)(.*)/)
21
+ "*[name()='#{match[1]}']#{match[2]}"
22
+ else
23
+ tag
24
+ end
25
+ end.join("/")
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,33 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ ###
4
+ # The Slop decorator implements method missing such that a methods may be
5
+ # used instead of XPath or CSS. See Nokogiri.Slop
6
+ module Slop
7
+ ###
8
+ # look for node with +name+. See Nokogiri.Slop
9
+ def method_missing name, *args, &block
10
+ if args.empty?
11
+ list = xpath("./#{name}")
12
+ elsif args.first.is_a? Hash
13
+ hash = args.first
14
+ if hash[:css]
15
+ list = css("#{name}#{hash[:css]}")
16
+ elsif hash[:xpath]
17
+ conds = Array(hash[:xpath]).join(' and ')
18
+ list = xpath("./#{name}[#{conds}]")
19
+ end
20
+ else
21
+ CSS::Parser.without_cache do
22
+ list = xpath(
23
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => "./")
24
+ )
25
+ end
26
+ end
27
+
28
+ super if list.empty?
29
+ list.length == 1 ? list.first : list
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,37 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class Document < XML::Document
4
+
5
+ attr_accessor :cstruct # :nodoc:
6
+
7
+ def self.new(*args) # :nodoc:
8
+ uri = args[0]
9
+ external_id = args[1]
10
+ doc = wrap(LibXML.htmlNewDoc(uri, external_id))
11
+ doc.send :initialize, *args
12
+ doc
13
+ end
14
+
15
+ def self.read_io(io, url, encoding, options) # :nodoc:
16
+ wrap_with_error_handling do
17
+ LibXML.htmlReadIO(IoCallbacks.reader(io), nil, nil, url, encoding, options)
18
+ end
19
+ end
20
+
21
+ def self.read_memory(string, url, encoding, options) # :nodoc:
22
+ wrap_with_error_handling do
23
+ LibXML.htmlReadMemory(string, string.length, url, encoding, options)
24
+ end
25
+ end
26
+
27
+ def meta_encoding=(encoding) # :nodoc:
28
+ LibXML.htmlSetMetaEncoding(cstruct, encoding)
29
+ encoding
30
+ end
31
+
32
+ def meta_encoding # :nodoc:
33
+ LibXML.htmlGetMetaEncoding(cstruct)
34
+ end
35
+ end
36
+ end
37
+ end