superfeedr-nokogiri 1.4.0.20091116183308

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +330 -0
  3. data/CHANGELOG.rdoc +314 -0
  4. data/Manifest.txt +269 -0
  5. data/README.ja.rdoc +105 -0
  6. data/README.rdoc +118 -0
  7. data/Rakefile +244 -0
  8. data/bin/nokogiri +49 -0
  9. data/ext/nokogiri/extconf.rb +145 -0
  10. data/ext/nokogiri/html_document.c +145 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +32 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  17. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  18. data/ext/nokogiri/nokogiri.c +89 -0
  19. data/ext/nokogiri/nokogiri.h +145 -0
  20. data/ext/nokogiri/xml_attr.c +92 -0
  21. data/ext/nokogiri/xml_attr.h +9 -0
  22. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  23. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  24. data/ext/nokogiri/xml_cdata.c +54 -0
  25. data/ext/nokogiri/xml_cdata.h +9 -0
  26. data/ext/nokogiri/xml_comment.c +52 -0
  27. data/ext/nokogiri/xml_comment.h +9 -0
  28. data/ext/nokogiri/xml_document.c +388 -0
  29. data/ext/nokogiri/xml_document.h +24 -0
  30. data/ext/nokogiri/xml_document_fragment.c +46 -0
  31. data/ext/nokogiri/xml_document_fragment.h +10 -0
  32. data/ext/nokogiri/xml_dtd.c +192 -0
  33. data/ext/nokogiri/xml_dtd.h +10 -0
  34. data/ext/nokogiri/xml_element_content.c +123 -0
  35. data/ext/nokogiri/xml_element_content.h +10 -0
  36. data/ext/nokogiri/xml_element_decl.c +69 -0
  37. data/ext/nokogiri/xml_element_decl.h +9 -0
  38. data/ext/nokogiri/xml_entity_decl.c +97 -0
  39. data/ext/nokogiri/xml_entity_decl.h +10 -0
  40. data/ext/nokogiri/xml_entity_reference.c +50 -0
  41. data/ext/nokogiri/xml_entity_reference.h +9 -0
  42. data/ext/nokogiri/xml_io.c +31 -0
  43. data/ext/nokogiri/xml_io.h +11 -0
  44. data/ext/nokogiri/xml_namespace.c +74 -0
  45. data/ext/nokogiri/xml_namespace.h +12 -0
  46. data/ext/nokogiri/xml_node.c +1060 -0
  47. data/ext/nokogiri/xml_node.h +13 -0
  48. data/ext/nokogiri/xml_node_set.c +397 -0
  49. data/ext/nokogiri/xml_node_set.h +9 -0
  50. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  51. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  52. data/ext/nokogiri/xml_reader.c +593 -0
  53. data/ext/nokogiri/xml_reader.h +10 -0
  54. data/ext/nokogiri/xml_relax_ng.c +159 -0
  55. data/ext/nokogiri/xml_relax_ng.h +9 -0
  56. data/ext/nokogiri/xml_sax_parser.c +286 -0
  57. data/ext/nokogiri/xml_sax_parser.h +43 -0
  58. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  59. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  60. data/ext/nokogiri/xml_sax_push_parser.c +114 -0
  61. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  62. data/ext/nokogiri/xml_schema.c +156 -0
  63. data/ext/nokogiri/xml_schema.h +9 -0
  64. data/ext/nokogiri/xml_syntax_error.c +261 -0
  65. data/ext/nokogiri/xml_syntax_error.h +13 -0
  66. data/ext/nokogiri/xml_text.c +48 -0
  67. data/ext/nokogiri/xml_text.h +9 -0
  68. data/ext/nokogiri/xml_xpath.c +53 -0
  69. data/ext/nokogiri/xml_xpath.h +11 -0
  70. data/ext/nokogiri/xml_xpath_context.c +239 -0
  71. data/ext/nokogiri/xml_xpath_context.h +9 -0
  72. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  73. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  74. data/lib/nokogiri.rb +116 -0
  75. data/lib/nokogiri/css.rb +25 -0
  76. data/lib/nokogiri/css/generated_parser.rb +646 -0
  77. data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
  78. data/lib/nokogiri/css/node.rb +99 -0
  79. data/lib/nokogiri/css/parser.rb +82 -0
  80. data/lib/nokogiri/css/parser.y +227 -0
  81. data/lib/nokogiri/css/syntax_error.rb +7 -0
  82. data/lib/nokogiri/css/tokenizer.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rex +54 -0
  84. data/lib/nokogiri/css/xpath_visitor.rb +162 -0
  85. data/lib/nokogiri/decorators/slop.rb +33 -0
  86. data/lib/nokogiri/ffi/html/document.rb +28 -0
  87. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  88. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  89. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  90. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  91. data/lib/nokogiri/ffi/libxml.rb +356 -0
  92. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  93. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  94. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  95. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  96. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  97. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  98. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  100. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  101. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  102. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  103. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  104. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  105. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  106. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  107. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  108. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  109. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  110. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  111. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  112. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
  113. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  114. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  115. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  116. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  117. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  118. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  119. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  120. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  121. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  122. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  123. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  124. data/lib/nokogiri/ffi/xml/document.rb +135 -0
  125. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  126. data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
  127. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  128. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  129. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  132. data/lib/nokogiri/ffi/xml/node.rb +444 -0
  133. data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
  134. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  135. data/lib/nokogiri/ffi/xml/reader.rb +227 -0
  136. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  137. data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
  138. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  139. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
  140. data/lib/nokogiri/ffi/xml/schema.rb +92 -0
  141. data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
  142. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  143. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  144. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  145. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  146. data/lib/nokogiri/html.rb +35 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +88 -0
  149. data/lib/nokogiri/html/document_fragment.rb +15 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  152. data/lib/nokogiri/html/sax/parser.rb +48 -0
  153. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  154. data/lib/nokogiri/syntax_error.rb +4 -0
  155. data/lib/nokogiri/version.rb +33 -0
  156. data/lib/nokogiri/version_warning.rb +11 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +405 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +131 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +69 -0
  165. data/lib/nokogiri/xml/dtd.rb +11 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  169. data/lib/nokogiri/xml/fragment_handler.rb +71 -0
  170. data/lib/nokogiri/xml/namespace.rb +13 -0
  171. data/lib/nokogiri/xml/node.rb +665 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  173. data/lib/nokogiri/xml/node_set.rb +307 -0
  174. data/lib/nokogiri/xml/notation.rb +6 -0
  175. data/lib/nokogiri/xml/parse_options.rb +85 -0
  176. data/lib/nokogiri/xml/pp.rb +2 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  178. data/lib/nokogiri/xml/pp/node.rb +56 -0
  179. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  180. data/lib/nokogiri/xml/reader.rb +74 -0
  181. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  182. data/lib/nokogiri/xml/sax.rb +4 -0
  183. data/lib/nokogiri/xml/sax/document.rb +160 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  187. data/lib/nokogiri/xml/schema.rb +61 -0
  188. data/lib/nokogiri/xml/syntax_error.rb +38 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +48 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xsd/xmlparser/nokogiri.rb +71 -0
  195. data/tasks/test.rb +100 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +277 -0
  198. data/test/css/test_tokenizer.rb +183 -0
  199. data/test/css/test_xpath_visitor.rb +76 -0
  200. data/test/ffi/test_document.rb +35 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/exslt.xml +8 -0
  207. data/test/files/exslt.xslt +35 -0
  208. data/test/files/foo/foo.xsd +4 -0
  209. data/test/files/po.xml +32 -0
  210. data/test/files/po.xsd +66 -0
  211. data/test/files/shift_jis.html +10 -0
  212. data/test/files/shift_jis.xml +5 -0
  213. data/test/files/snuggles.xml +3 -0
  214. data/test/files/staff.dtd +10 -0
  215. data/test/files/staff.xml +59 -0
  216. data/test/files/staff.xslt +32 -0
  217. data/test/files/tlm.html +850 -0
  218. data/test/files/valid_bar.xml +2 -0
  219. data/test/helper.rb +136 -0
  220. data/test/html/sax/test_parser.rb +64 -0
  221. data/test/html/sax/test_parser_context.rb +48 -0
  222. data/test/html/test_builder.rb +164 -0
  223. data/test/html/test_document.rb +390 -0
  224. data/test/html/test_document_encoding.rb +77 -0
  225. data/test/html/test_document_fragment.rb +132 -0
  226. data/test/html/test_element_description.rb +94 -0
  227. data/test/html/test_named_characters.rb +14 -0
  228. data/test/html/test_node.rb +228 -0
  229. data/test/html/test_node_encoding.rb +27 -0
  230. data/test/test_convert_xpath.rb +135 -0
  231. data/test/test_css_cache.rb +45 -0
  232. data/test/test_gc.rb +15 -0
  233. data/test/test_memory_leak.rb +77 -0
  234. data/test/test_nokogiri.rb +134 -0
  235. data/test/test_reader.rb +358 -0
  236. data/test/test_xslt_transforms.rb +131 -0
  237. data/test/xml/node/test_save_options.rb +20 -0
  238. data/test/xml/node/test_subclass.rb +44 -0
  239. data/test/xml/sax/test_parser.rb +307 -0
  240. data/test/xml/sax/test_parser_context.rb +56 -0
  241. data/test/xml/sax/test_push_parser.rb +131 -0
  242. data/test/xml/test_attr.rb +38 -0
  243. data/test/xml/test_attribute_decl.rb +82 -0
  244. data/test/xml/test_builder.rb +167 -0
  245. data/test/xml/test_cdata.rb +38 -0
  246. data/test/xml/test_comment.rb +29 -0
  247. data/test/xml/test_document.rb +607 -0
  248. data/test/xml/test_document_encoding.rb +26 -0
  249. data/test/xml/test_document_fragment.rb +138 -0
  250. data/test/xml/test_dtd.rb +82 -0
  251. data/test/xml/test_dtd_encoding.rb +33 -0
  252. data/test/xml/test_element_content.rb +56 -0
  253. data/test/xml/test_element_decl.rb +73 -0
  254. data/test/xml/test_entity_decl.rb +83 -0
  255. data/test/xml/test_entity_reference.rb +21 -0
  256. data/test/xml/test_namespace.rb +68 -0
  257. data/test/xml/test_node.rb +889 -0
  258. data/test/xml/test_node_attributes.rb +34 -0
  259. data/test/xml/test_node_encoding.rb +107 -0
  260. data/test/xml/test_node_set.rb +531 -0
  261. data/test/xml/test_parse_options.rb +52 -0
  262. data/test/xml/test_processing_instruction.rb +30 -0
  263. data/test/xml/test_reader_encoding.rb +126 -0
  264. data/test/xml/test_relax_ng.rb +60 -0
  265. data/test/xml/test_schema.rb +89 -0
  266. data/test/xml/test_syntax_error.rb +27 -0
  267. data/test/xml/test_text.rb +30 -0
  268. data/test/xml/test_unparented_node.rb +381 -0
  269. data/test/xml/test_xpath.rb +106 -0
  270. metadata +430 -0
@@ -0,0 +1,7 @@
1
+ require 'nokogiri/syntax_error'
2
+ module Nokogiri
3
+ module CSS
4
+ class SyntaxError < ::Nokogiri::SyntaxError
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Tokenizer < GeneratedTokenizer
4
+ alias :scan :scan_setup
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,54 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class GeneratedTokenizer < GeneratedParser
4
+
5
+ macro
6
+ nl \n|\r\n|\r|\f
7
+ w [\s\r\n\f]*
8
+ nonascii [^\0-\177]
9
+ num -?([0-9]+|[0-9]*\.[0-9]+)
10
+ unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?
11
+
12
+ escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
13
+ nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_A-Za-z]|{nonascii}|{escape}
15
+ ident [-@]?({nmstart})({nmchar})*
16
+ name ({nmchar})+
17
+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
18
+ string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
19
+ string {string1}|{string2}
20
+
21
+ rule
22
+
23
+ # [:state] pattern [actions]
24
+
25
+ {ident}\(\s* { [:FUNCTION, text] }
26
+ {ident} { [:IDENT, text] }
27
+ \#{name} { [:HASH, text] }
28
+ {w}~={w} { [:INCLUDES, text] }
29
+ {w}\|={w} { [:DASHMATCH, text] }
30
+ {w}\^={w} { [:PREFIXMATCH, text] }
31
+ {w}\$={w} { [:SUFFIXMATCH, text] }
32
+ {w}\*={w} { [:SUBSTRINGMATCH, text] }
33
+ {w}!={w} { [:NOT_EQUAL, text] }
34
+ {w}={w} { [:EQUAL, text] }
35
+ {w}\) { [:RPAREN, text] }
36
+ {w}\[{w} { [:LSQUARE, text] }
37
+ {w}\] { [:RSQUARE, text] }
38
+ {w}\+{w} { [:PLUS, text] }
39
+ {w}>{w} { [:GREATER, text] }
40
+ {w},{w} { [:COMMA, text] }
41
+ {w}~{w} { [:TILDE, text] }
42
+ \:not\({w} { [:NOT, text] }
43
+ {num} { [:NUMBER, text] }
44
+ {w}\/\/{w} { [:DOUBLESLASH, text] }
45
+ {w}\/{w} { [:SLASH, text] }
46
+
47
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
48
+
49
+ [\s\t\r\n\f]+ { [:S, text] }
50
+ {string} { [:STRING, text] }
51
+ . { [text, text] }
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,162 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class XPathVisitor # :nodoc:
4
+ def visit_function node
5
+ # note that nth-child and nth-last-child are preprocessed in css/node.rb.
6
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
+ return self.send(msg, node) if self.respond_to?(msg)
8
+
9
+ case node.value.first
10
+ when /^text\(/
11
+ 'child::text()'
12
+ when /^self\(/
13
+ "self::#{node.value[1]}"
14
+ when /^(eq|nth|nth-of-type|nth-child)\(/
15
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
16
+ an_plus_b(node.value[1])
17
+ else
18
+ "position() = " + node.value[1]
19
+ end
20
+ when /^(first|first-of-type)\(/
21
+ "position() = 1"
22
+ when /^(last|last-of-type)\(/
23
+ "position() = last()"
24
+ when /^(nth-last-child|nth-last-of-type)\(/
25
+ "position() = last() - #{node.value[1]}"
26
+ when /^contains\(/
27
+ "contains(., #{node.value[1]})"
28
+ when /^gt\(/
29
+ "position() > #{node.value[1]}"
30
+ when /^only-child\(/
31
+ "last() = 1"
32
+ when /^comment\(/
33
+ "comment()"
34
+ else
35
+ args = ['.'] + node.value[1..-1]
36
+ "#{node.value.first}#{args.join(', ')})"
37
+ end
38
+ end
39
+
40
+ def visit_not node
41
+ 'not(' + node.value.first.accept(self) + ')'
42
+ end
43
+
44
+ def visit_preceding_selector node
45
+ node.value.last.accept(self) +
46
+ '[preceding-sibling::' +
47
+ node.value.first.accept(self) +
48
+ ']'
49
+ end
50
+
51
+ def visit_id node
52
+ node.value.first =~ /^#(.*)$/
53
+ "@id = '#{$1}'"
54
+ end
55
+
56
+ def visit_attribute_condition node
57
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
58
+ ''
59
+ else
60
+ '@'
61
+ end
62
+ attribute += node.value.first.accept(self)
63
+
64
+ # Support non-standard css
65
+ attribute.gsub!(/^@@/, '@')
66
+
67
+ return attribute unless node.value.length == 3
68
+
69
+ value = node.value.last
70
+ value = "'#{value}'" if value !~ /^['"]/
71
+
72
+ case node.value[1]
73
+ when :equal
74
+ attribute + " = " + "#{value}"
75
+ when :not_equal
76
+ attribute + " != " + "#{value}"
77
+ when :substring_match
78
+ "contains(#{attribute}, #{value})"
79
+ when :prefix_match
80
+ "starts-with(#{attribute}, #{value})"
81
+ when :dash_match
82
+ "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
83
+ when :includes
84
+ "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
85
+ when :suffix_match
86
+ "substring(#{attribute}, string-length(#{attribute}) - " +
87
+ "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
88
+ else
89
+ attribute + " #{node.value[1]} " + "#{value}"
90
+ end
91
+ end
92
+
93
+ def visit_pseudo_class node
94
+ if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
95
+ node.value.first.accept(self)
96
+ else
97
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
98
+ return self.send(msg, node) if self.respond_to?(msg)
99
+
100
+ case node.value.first
101
+ when "first" then "position() = 1"
102
+ when "last" then "position() = last()"
103
+ when "first-of-type" then "position() = 1"
104
+ when "last-of-type" then "position() = last()"
105
+ when "only-of-type" then "last() = 1"
106
+ when "empty" then "not(node())"
107
+ when "parent" then "node()"
108
+ when "root" then "not(parent::*)"
109
+ else
110
+ node.value.first + "(.)"
111
+ end
112
+ end
113
+ end
114
+
115
+ def visit_class_condition node
116
+ "contains(concat(' ', @class, ' '), ' #{node.value.first} ')"
117
+ end
118
+
119
+ {
120
+ 'combinator' => ' and ',
121
+ 'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
122
+ 'descendant_selector' => '//',
123
+ 'child_selector' => '/',
124
+ }.each do |k,v|
125
+ class_eval %{
126
+ def visit_#{k} node
127
+ "\#{node.value.first.accept(self)}#{v}\#{node.value.last.accept(self)}"
128
+ end
129
+ }
130
+ end
131
+
132
+ def visit_conditional_selector node
133
+ node.value.first.accept(self) + '[' +
134
+ node.value.last.accept(self) + ']'
135
+ end
136
+
137
+ def visit_element_name node
138
+ node.value.first
139
+ end
140
+
141
+ def accept node
142
+ node.accept(self)
143
+ end
144
+
145
+ private
146
+ def an_plus_b node
147
+ raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
148
+
149
+ a = node.value[0].to_i
150
+ b = node.value[3].to_i
151
+
152
+ if (b == 0)
153
+ return "(position() mod #{a}) = 0"
154
+ else
155
+ compare = (a < 0) ? "<=" : ">="
156
+ return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
157
+ end
158
+ end
159
+
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,33 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ ###
4
+ # The Slop decorator implements method missing such that a methods may be
5
+ # used instead of XPath or CSS. See Nokogiri.Slop
6
+ module Slop
7
+ ###
8
+ # look for node with +name+. See Nokogiri.Slop
9
+ def method_missing name, *args, &block
10
+ if args.empty?
11
+ list = xpath("./#{name}")
12
+ elsif args.first.is_a? Hash
13
+ hash = args.first
14
+ if hash[:css]
15
+ list = css("#{name}#{hash[:css]}")
16
+ elsif hash[:xpath]
17
+ conds = Array(hash[:xpath]).join(' and ')
18
+ list = xpath("./#{name}[#{conds}]")
19
+ end
20
+ else
21
+ CSS::Parser.without_cache do
22
+ list = xpath(
23
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => "./")
24
+ )
25
+ end
26
+ end
27
+
28
+ super if list.empty?
29
+ list.length == 1 ? list.first : list
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,28 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class Document < XML::Document
4
+
5
+ attr_accessor :cstruct # :nodoc:
6
+
7
+ def self.new(*args) # :nodoc:
8
+ uri = args[0]
9
+ external_id = args[1]
10
+ doc = wrap(LibXML.htmlNewDoc(uri, external_id))
11
+ doc.send :initialize, *args
12
+ doc
13
+ end
14
+
15
+ def self.read_io(io, url, encoding, options) # :nodoc:
16
+ wrap_with_error_handling do
17
+ LibXML.htmlReadIO(IoCallbacks.reader(io), nil, nil, url, encoding, options)
18
+ end
19
+ end
20
+
21
+ def self.read_memory(string, url, encoding, options) # :nodoc:
22
+ wrap_with_error_handling do
23
+ LibXML.htmlReadMemory(string, string.length, url, encoding, options)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,85 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class ElementDescription
4
+
5
+ attr_accessor :cstruct # :nodoc:
6
+
7
+ def required_attributes # :nodoc:
8
+ get_string_array_from :attrs_req
9
+ end
10
+
11
+ def deprecated_attributes # :nodoc:
12
+ get_string_array_from :attrs_depr
13
+ end
14
+
15
+ def optional_attributes # :nodoc:
16
+ get_string_array_from :attrs_opt
17
+ end
18
+
19
+ def default_sub_element # :nodoc:
20
+ cstruct[:defaultsubelt]
21
+ end
22
+
23
+ def sub_elements # :nodoc:
24
+ get_string_array_from :subelts
25
+ end
26
+
27
+ def description # :nodoc:
28
+ cstruct[:desc]
29
+ end
30
+
31
+ def inline? # :nodoc:
32
+ cstruct[:isinline] != 0
33
+ end
34
+
35
+ def deprecated? # :nodoc:
36
+ cstruct[:depr] != 0
37
+ end
38
+
39
+ def empty? # :nodoc:
40
+ cstruct[:empty] != 0
41
+ end
42
+
43
+ def save_end_tag? # :nodoc:
44
+ cstruct[:saveEndTag] != 0
45
+ end
46
+
47
+ def implied_end_tag? # :nodoc:
48
+ cstruct[:endTag] != 0
49
+ end
50
+
51
+ def implied_start_tag? # :nodoc:
52
+ cstruct[:startTag] != 0
53
+ end
54
+
55
+ def name # :nodoc:
56
+ cstruct[:name]
57
+ end
58
+
59
+ def self.[](tag_name) # :nodoc:
60
+ ptr = LibXML.htmlTagLookup(tag_name)
61
+ return nil if ptr.null?
62
+
63
+ desc = allocate
64
+ desc.cstruct = LibXML::HtmlElemDesc.new(ptr)
65
+ desc
66
+ end
67
+
68
+ private
69
+
70
+ def get_string_array_from(sym) # :nodoc:
71
+ list = []
72
+ return list if cstruct[sym].null?
73
+
74
+ j = 0
75
+ while (ptr = cstruct[sym].get_pointer(j * FFI.type_size(:pointer))) && ! ptr.null?
76
+ list << ptr.read_string
77
+ j += 1
78
+ end
79
+
80
+ list
81
+ end
82
+
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,16 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class EntityLookup
4
+
5
+ def get(key) # :nodoc:
6
+ ptr = LibXML.htmlEntityLookup(key.to_s)
7
+ return nil if ptr.null?
8
+
9
+ cstruct = LibXML::HtmlEntityDesc.new(ptr)
10
+ EntityDescription.new cstruct[:value], cstruct[:name], cstruct[:desc]
11
+ end
12
+
13
+ end
14
+ end
15
+ end
16
+
@@ -0,0 +1,38 @@
1
+ # :stopdoc:
2
+ module Nokogiri
3
+ module HTML
4
+ module SAX
5
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
6
+ attr_accessor :cstruct
7
+
8
+ def self.file filename, encoding
9
+ ctx = LibXML.htmlCreateFileParserCtxt filename, encoding
10
+ pc = allocate
11
+ pc.cstruct = LibXML::XmlParserContext.new ctx
12
+ pc
13
+ end
14
+
15
+ def self.memory data, encoding
16
+ raise ArgumentError unless data
17
+ raise "data cannot be empty" unless data.length > 0
18
+
19
+ ctx = LibXML.htmlCreateMemoryParserCtxt data, data.length
20
+ pc = allocate
21
+ pc.cstruct = LibXML::XmlParserContext.new ctx
22
+ if encoding
23
+ enc = LibXML.xmlParseCharEncoding(encoding)
24
+ if enc != LibXML::XML_CHAR_ENCODING_ERROR
25
+ LibXML.xmlSwitchEncoding(ctx, enc)
26
+ end
27
+ end
28
+ pc
29
+ end
30
+
31
+ def parse_with sax_handler, type = :html
32
+ super
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ # :startdoc:
@@ -0,0 +1,42 @@
1
+ # :stopdoc:
2
+ module Nokogiri
3
+ module IoCallbacks
4
+
5
+ class << self
6
+
7
+ def plain_old_reader(io)
8
+ lambda do |ctx, buffer, len|
9
+ string = io.read(len)
10
+ return 0 if string.nil?
11
+ buffer.put_bytes(0, string, 0, string.length)
12
+ string.length
13
+ end
14
+ end
15
+
16
+ if defined?(FFI::IO.native_read)
17
+ def ffi_io_native_reader(io)
18
+ if io.is_a?(StringIO)
19
+ plain_old_reader(io)
20
+ else
21
+ lambda do |ctx, buffer, len|
22
+ rcode = FFI::IO.native_read(io, buffer, len)
23
+ (rcode < 0) ? 0 : rcode
24
+ end
25
+ end
26
+ end
27
+ alias :reader :ffi_io_native_reader
28
+ else
29
+ alias :reader :plain_old_reader
30
+ end
31
+
32
+ def writer(io)
33
+ lambda do |context, buffer, len|
34
+ io.write buffer
35
+ len
36
+ end
37
+ end
38
+ end
39
+
40
+ end
41
+ end
42
+ # :startdoc: