nokogiri 1.2.3-x86-mswin32-60 → 1.4.5-x86-mswin32-60

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (319) hide show
  1. data/.autotest +18 -7
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.ja.rdoc +297 -3
  4. data/CHANGELOG.rdoc +289 -0
  5. data/Manifest.txt +148 -37
  6. data/README.ja.rdoc +20 -20
  7. data/README.rdoc +53 -22
  8. data/Rakefile +127 -211
  9. data/bin/nokogiri +54 -0
  10. data/ext/nokogiri/depend +358 -0
  11. data/ext/nokogiri/extconf.rb +89 -54
  12. data/ext/nokogiri/html_document.c +34 -27
  13. data/ext/nokogiri/html_document.h +1 -1
  14. data/ext/nokogiri/html_element_description.c +276 -0
  15. data/ext/nokogiri/html_element_description.h +10 -0
  16. data/ext/nokogiri/html_entity_lookup.c +7 -5
  17. data/ext/nokogiri/html_entity_lookup.h +1 -1
  18. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  19. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  20. data/ext/nokogiri/{native.c → nokogiri.c} +31 -7
  21. data/ext/nokogiri/{native.h → nokogiri.h} +68 -41
  22. data/ext/nokogiri/xml_attr.c +20 -9
  23. data/ext/nokogiri/xml_attr.h +1 -1
  24. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  25. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +21 -9
  27. data/ext/nokogiri/xml_cdata.h +1 -1
  28. data/ext/nokogiri/xml_comment.c +18 -6
  29. data/ext/nokogiri/xml_comment.h +1 -1
  30. data/ext/nokogiri/xml_document.c +247 -68
  31. data/ext/nokogiri/xml_document.h +5 -3
  32. data/ext/nokogiri/xml_document_fragment.c +15 -7
  33. data/ext/nokogiri/xml_document_fragment.h +1 -1
  34. data/ext/nokogiri/xml_dtd.c +110 -10
  35. data/ext/nokogiri/xml_dtd.h +3 -1
  36. data/ext/nokogiri/xml_element_content.c +123 -0
  37. data/ext/nokogiri/xml_element_content.h +10 -0
  38. data/ext/nokogiri/xml_element_decl.c +69 -0
  39. data/ext/nokogiri/xml_element_decl.h +9 -0
  40. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  41. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  42. data/ext/nokogiri/xml_entity_decl.c +110 -0
  43. data/ext/nokogiri/xml_entity_decl.h +10 -0
  44. data/ext/nokogiri/xml_entity_reference.c +16 -5
  45. data/ext/nokogiri/xml_entity_reference.h +1 -1
  46. data/ext/nokogiri/xml_io.c +40 -8
  47. data/ext/nokogiri/xml_io.h +2 -1
  48. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  49. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  50. data/ext/nokogiri/xml_namespace.c +84 -0
  51. data/ext/nokogiri/xml_namespace.h +13 -0
  52. data/ext/nokogiri/xml_node.c +782 -225
  53. data/ext/nokogiri/xml_node.h +2 -4
  54. data/ext/nokogiri/xml_node_set.c +253 -34
  55. data/ext/nokogiri/xml_node_set.h +2 -2
  56. data/ext/nokogiri/xml_processing_instruction.c +17 -5
  57. data/ext/nokogiri/xml_processing_instruction.h +1 -1
  58. data/ext/nokogiri/xml_reader.c +277 -85
  59. data/ext/nokogiri/xml_reader.h +1 -1
  60. data/ext/nokogiri/xml_relax_ng.c +168 -0
  61. data/ext/nokogiri/xml_relax_ng.h +9 -0
  62. data/ext/nokogiri/xml_sax_parser.c +183 -111
  63. data/ext/nokogiri/xml_sax_parser.h +30 -1
  64. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  65. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  66. data/ext/nokogiri/xml_sax_push_parser.c +42 -12
  67. data/ext/nokogiri/xml_sax_push_parser.h +1 -1
  68. data/ext/nokogiri/xml_schema.c +205 -0
  69. data/ext/nokogiri/xml_schema.h +9 -0
  70. data/ext/nokogiri/xml_syntax_error.c +28 -173
  71. data/ext/nokogiri/xml_syntax_error.h +2 -1
  72. data/ext/nokogiri/xml_text.c +16 -6
  73. data/ext/nokogiri/xml_text.h +1 -1
  74. data/ext/nokogiri/xml_xpath_context.c +104 -47
  75. data/ext/nokogiri/xml_xpath_context.h +1 -1
  76. data/ext/nokogiri/xslt_stylesheet.c +161 -19
  77. data/ext/nokogiri/xslt_stylesheet.h +1 -1
  78. data/lib/nokogiri.rb +47 -8
  79. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  80. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  81. data/lib/nokogiri/css.rb +6 -3
  82. data/lib/nokogiri/css/node.rb +14 -12
  83. data/lib/nokogiri/css/parser.rb +665 -62
  84. data/lib/nokogiri/css/parser.y +20 -10
  85. data/lib/nokogiri/css/parser_extras.rb +91 -0
  86. data/lib/nokogiri/css/tokenizer.rb +148 -5
  87. data/lib/nokogiri/css/tokenizer.rex +10 -9
  88. data/lib/nokogiri/css/xpath_visitor.rb +47 -44
  89. data/lib/nokogiri/decorators/slop.rb +8 -4
  90. data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
  91. data/lib/nokogiri/ffi/html/document.rb +28 -0
  92. data/lib/nokogiri/ffi/html/element_description.rb +81 -0
  93. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  94. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  95. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  96. data/lib/nokogiri/ffi/libxml.rb +420 -0
  97. data/lib/nokogiri/ffi/structs/common_node.rb +38 -0
  98. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  99. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  100. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_attr.rb +20 -0
  102. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  103. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  104. data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
  105. data/lib/nokogiri/ffi/structs/xml_document.rb +117 -0
  106. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  107. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  108. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  109. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  110. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  111. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  112. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  113. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  114. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  115. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xml_parser_input.rb +19 -0
  117. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  118. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  119. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
  120. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  121. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  122. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  123. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +38 -0
  124. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  125. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  126. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  127. data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
  128. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  129. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  132. data/lib/nokogiri/ffi/xml/document.rb +174 -0
  133. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  134. data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
  135. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  136. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  137. data/lib/nokogiri/ffi/xml/entity_decl.rb +36 -0
  138. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  139. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/ffi/xml/node.rb +559 -0
  141. data/lib/nokogiri/ffi/xml/node_set.rb +150 -0
  142. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  143. data/lib/nokogiri/ffi/xml/reader.rb +236 -0
  144. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  145. data/lib/nokogiri/ffi/xml/sax/parser.rb +143 -0
  146. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +79 -0
  147. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +51 -0
  148. data/lib/nokogiri/ffi/xml/schema.rb +109 -0
  149. data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
  150. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  151. data/lib/nokogiri/ffi/xml/xpath.rb +9 -0
  152. data/lib/nokogiri/ffi/xml/xpath_context.rb +153 -0
  153. data/lib/nokogiri/ffi/xslt/stylesheet.rb +77 -0
  154. data/lib/nokogiri/html.rb +13 -47
  155. data/lib/nokogiri/html/builder.rb +27 -1
  156. data/lib/nokogiri/html/document.rb +201 -7
  157. data/lib/nokogiri/html/document_fragment.rb +41 -0
  158. data/lib/nokogiri/html/element_description.rb +23 -0
  159. data/lib/nokogiri/html/entity_lookup.rb +2 -0
  160. data/lib/nokogiri/html/sax/parser.rb +34 -3
  161. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  162. data/lib/nokogiri/nokogiri.rb +1 -0
  163. data/lib/nokogiri/version.rb +40 -1
  164. data/lib/nokogiri/version_warning.rb +14 -0
  165. data/lib/nokogiri/xml.rb +32 -53
  166. data/lib/nokogiri/xml/attr.rb +5 -0
  167. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  168. data/lib/nokogiri/xml/builder.rb +349 -29
  169. data/lib/nokogiri/xml/cdata.rb +3 -1
  170. data/lib/nokogiri/xml/character_data.rb +7 -0
  171. data/lib/nokogiri/xml/document.rb +166 -14
  172. data/lib/nokogiri/xml/document_fragment.rb +76 -1
  173. data/lib/nokogiri/xml/dtd.rb +16 -3
  174. data/lib/nokogiri/xml/element_content.rb +36 -0
  175. data/lib/nokogiri/xml/element_decl.rb +13 -0
  176. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  177. data/lib/nokogiri/xml/namespace.rb +13 -0
  178. data/lib/nokogiri/xml/node.rb +561 -166
  179. data/lib/nokogiri/xml/node/save_options.rb +22 -2
  180. data/lib/nokogiri/xml/node_set.rb +202 -40
  181. data/lib/nokogiri/xml/parse_options.rb +93 -0
  182. data/lib/nokogiri/xml/pp.rb +2 -0
  183. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  184. data/lib/nokogiri/xml/pp/node.rb +56 -0
  185. data/lib/nokogiri/xml/processing_instruction.rb +2 -0
  186. data/lib/nokogiri/xml/reader.rb +93 -8
  187. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  188. data/lib/nokogiri/xml/sax.rb +1 -7
  189. data/lib/nokogiri/xml/sax/document.rb +107 -2
  190. data/lib/nokogiri/xml/sax/parser.rb +57 -7
  191. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  192. data/lib/nokogiri/xml/sax/push_parser.rb +13 -1
  193. data/lib/nokogiri/xml/schema.rb +63 -0
  194. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  195. data/lib/nokogiri/xml/text.rb +4 -1
  196. data/lib/nokogiri/xml/xpath.rb +1 -1
  197. data/lib/nokogiri/xml/xpath/syntax_error.rb +3 -0
  198. data/lib/nokogiri/xml/xpath_context.rb +2 -0
  199. data/lib/nokogiri/xslt.rb +26 -2
  200. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  201. data/lib/xsd/xmlparser/nokogiri.rb +45 -9
  202. data/tasks/cross_compile.rb +173 -0
  203. data/tasks/test.rb +25 -69
  204. data/test/css/test_nthiness.rb +3 -4
  205. data/test/css/test_parser.rb +75 -20
  206. data/test/css/test_tokenizer.rb +23 -1
  207. data/test/css/test_xpath_visitor.rb +10 -1
  208. data/test/decorators/test_slop.rb +16 -0
  209. data/test/ffi/test_document.rb +35 -0
  210. data/test/files/2ch.html +108 -0
  211. data/test/files/address_book.rlx +12 -0
  212. data/test/files/address_book.xml +10 -0
  213. data/test/files/bar/bar.xsd +4 -0
  214. data/test/files/encoding.html +82 -0
  215. data/test/files/encoding.xhtml +84 -0
  216. data/test/files/foo/foo.xsd +4 -0
  217. data/test/files/po.xml +32 -0
  218. data/test/files/po.xsd +66 -0
  219. data/test/files/shift_jis.html +10 -0
  220. data/test/files/shift_jis.xml +5 -0
  221. data/test/files/snuggles.xml +3 -0
  222. data/test/files/staff.dtd +10 -0
  223. data/test/files/valid_bar.xml +2 -0
  224. data/test/helper.rb +101 -23
  225. data/test/html/sax/test_parser.rb +81 -2
  226. data/test/html/sax/test_parser_context.rb +48 -0
  227. data/test/html/test_builder.rb +39 -8
  228. data/test/html/test_document.rb +186 -23
  229. data/test/html/test_document_encoding.rb +78 -1
  230. data/test/html/test_document_fragment.rb +253 -0
  231. data/test/html/test_element_description.rb +98 -0
  232. data/test/html/test_named_characters.rb +1 -1
  233. data/test/html/test_node.rb +124 -36
  234. data/test/html/test_node_encoding.rb +27 -0
  235. data/test/test_convert_xpath.rb +1 -52
  236. data/test/test_css_cache.rb +2 -13
  237. data/test/test_encoding_handler.rb +46 -0
  238. data/test/test_memory_leak.rb +88 -19
  239. data/test/test_nokogiri.rb +38 -5
  240. data/test/test_reader.rb +188 -6
  241. data/test/test_soap4r_sax.rb +52 -0
  242. data/test/test_xslt_transforms.rb +183 -83
  243. data/test/xml/node/test_save_options.rb +1 -1
  244. data/test/xml/node/test_subclass.rb +44 -0
  245. data/test/xml/sax/test_parser.rb +175 -4
  246. data/test/xml/sax/test_parser_context.rb +113 -0
  247. data/test/xml/sax/test_push_parser.rb +90 -2
  248. data/test/xml/test_attr.rb +35 -1
  249. data/test/xml/test_attribute_decl.rb +82 -0
  250. data/test/xml/test_builder.rb +186 -1
  251. data/test/xml/test_cdata.rb +32 -1
  252. data/test/xml/test_comment.rb +13 -1
  253. data/test/xml/test_document.rb +415 -43
  254. data/test/xml/test_document_encoding.rb +1 -1
  255. data/test/xml/test_document_fragment.rb +173 -5
  256. data/test/xml/test_dtd.rb +61 -6
  257. data/test/xml/test_dtd_encoding.rb +3 -1
  258. data/test/xml/test_element_content.rb +56 -0
  259. data/test/xml/test_element_decl.rb +73 -0
  260. data/test/xml/test_entity_decl.rb +120 -0
  261. data/test/xml/test_entity_reference.rb +5 -1
  262. data/test/xml/test_namespace.rb +68 -0
  263. data/test/xml/test_node.rb +546 -201
  264. data/test/xml/test_node_attributes.rb +34 -0
  265. data/test/xml/test_node_encoding.rb +33 -3
  266. data/test/xml/test_node_reparenting.rb +321 -0
  267. data/test/xml/test_node_set.rb +538 -2
  268. data/test/xml/test_parse_options.rb +52 -0
  269. data/test/xml/test_processing_instruction.rb +6 -1
  270. data/test/xml/test_reader_encoding.rb +1 -1
  271. data/test/xml/test_relax_ng.rb +60 -0
  272. data/test/xml/test_schema.rb +94 -0
  273. data/test/xml/test_syntax_error.rb +12 -0
  274. data/test/xml/test_text.rb +35 -1
  275. data/test/xml/test_unparented_node.rb +5 -5
  276. data/test/xml/test_xpath.rb +142 -11
  277. data/test/xslt/test_custom_functions.rb +94 -0
  278. metadata +328 -92
  279. data/ext/nokogiri/html_sax_parser.c +0 -57
  280. data/ext/nokogiri/html_sax_parser.h +0 -11
  281. data/ext/nokogiri/iconv.dll +0 -0
  282. data/ext/nokogiri/libexslt.dll +0 -0
  283. data/ext/nokogiri/libxml2.dll +0 -0
  284. data/ext/nokogiri/libxslt.dll +0 -0
  285. data/ext/nokogiri/native.so +0 -0
  286. data/ext/nokogiri/xml_xpath.c +0 -53
  287. data/ext/nokogiri/xml_xpath.h +0 -11
  288. data/ext/nokogiri/zlib1.dll +0 -0
  289. data/lib/action-nokogiri.rb +0 -30
  290. data/lib/nokogiri/css/generated_parser.rb +0 -713
  291. data/lib/nokogiri/css/generated_tokenizer.rb +0 -144
  292. data/lib/nokogiri/decorators.rb +0 -2
  293. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  294. data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
  295. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
  296. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -28
  297. data/lib/nokogiri/hpricot.rb +0 -51
  298. data/lib/nokogiri/xml/comment.rb +0 -6
  299. data/lib/nokogiri/xml/element.rb +0 -6
  300. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  301. data/lib/nokogiri/xml/fragment_handler.rb +0 -34
  302. data/test/hpricot/files/basic.xhtml +0 -17
  303. data/test/hpricot/files/boingboing.html +0 -2266
  304. data/test/hpricot/files/cy0.html +0 -3653
  305. data/test/hpricot/files/immob.html +0 -400
  306. data/test/hpricot/files/pace_application.html +0 -1320
  307. data/test/hpricot/files/tenderlove.html +0 -16
  308. data/test/hpricot/files/uswebgen.html +0 -220
  309. data/test/hpricot/files/utf8.html +0 -1054
  310. data/test/hpricot/files/week9.html +0 -1723
  311. data/test/hpricot/files/why.xml +0 -19
  312. data/test/hpricot/load_files.rb +0 -11
  313. data/test/hpricot/test_alter.rb +0 -68
  314. data/test/hpricot/test_builder.rb +0 -20
  315. data/test/hpricot/test_parser.rb +0 -426
  316. data/test/hpricot/test_paths.rb +0 -15
  317. data/test/hpricot/test_preserved.rb +0 -77
  318. data/test/hpricot/test_xml.rb +0 -30
  319. data/test/test_gc.rb +0 -15
@@ -0,0 +1,18 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Text < CharacterData
4
+
5
+ def self.new(string, document, *rest) # :nodoc:
6
+ node_ptr = LibXML.xmlNewText(string)
7
+ node_cstruct = LibXML::XmlNode.new(node_ptr)
8
+ node_cstruct[:doc] = document.cstruct[:doc]
9
+
10
+ node = Node.wrap(node_cstruct, self)
11
+ node.send :initialize, string, document, *rest
12
+ yield node if block_given?
13
+ node
14
+ end
15
+
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ module Nokogiri
2
+ module XML
3
+ class XPath
4
+
5
+ attr_accessor :cstruct # :nodoc:
6
+
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,153 @@
1
+ module Nokogiri
2
+ module XML
3
+ class XPathContext
4
+
5
+ attr_accessor :cstruct # :nodoc:
6
+
7
+ def register_ns(prefix, uri) # :nodoc:
8
+ LibXML.xmlXPathRegisterNs(cstruct, prefix, uri)
9
+ end
10
+
11
+ def register_variable(name, value) # :nodoc:
12
+ xml_value = LibXML.xmlXPathNewCString(value);
13
+ LibXML.xmlXPathRegisterVariable(cstruct, name, xml_value);
14
+ end
15
+
16
+ def evaluate(search_path, xpath_handler=nil) # :nodoc:
17
+ lookup = nil # to keep lambda in scope long enough to avoid a possible GC tragedy
18
+ query = search_path.to_s
19
+
20
+ if xpath_handler
21
+ lookup = lambda do |ctx, name, uri|
22
+ return nil unless xpath_handler.respond_to?(name)
23
+ ruby_funcall name, xpath_handler
24
+ end
25
+ LibXML.xmlXPathRegisterFuncLookup(cstruct, lookup, nil);
26
+ end
27
+
28
+ exception_handler = lambda do |ctx, error|
29
+ raise XPath::SyntaxError.wrap(error)
30
+ end
31
+ LibXML.xmlResetLastError()
32
+ LibXML.xmlSetStructuredErrorFunc(nil, exception_handler)
33
+
34
+ generic_exception_handler = lambda do |ctx, msg|
35
+ raise RuntimeError.new(msg) # TODO: varargs
36
+ end
37
+ LibXML.xmlSetGenericErrorFunc(nil, generic_exception_handler)
38
+
39
+ xpath_ptr = LibXML.xmlXPathEvalExpression(query, cstruct)
40
+
41
+ LibXML.xmlSetStructuredErrorFunc(nil, nil)
42
+ LibXML.xmlSetGenericErrorFunc(nil, nil)
43
+
44
+ if xpath_ptr.null?
45
+ error = LibXML.xmlGetLastError()
46
+ raise XPath::SyntaxError.wrap(error)
47
+ end
48
+
49
+ xpath = XML::XPath.new
50
+ xpath.cstruct = LibXML::XmlXpathObject.new(xpath_ptr)
51
+ xpath.document = cstruct.document.ruby_doc
52
+
53
+ case xpath.cstruct[:type]
54
+ when LibXML::XmlXpathObject::XPATH_NODESET
55
+ if xpath.cstruct[:nodesetval].null?
56
+ NodeSet.new(xpath.document)
57
+ else
58
+ NodeSet.wrap(xpath.cstruct[:nodesetval], xpath.document)
59
+ end
60
+ when LibXML::XmlXpathObject::XPATH_STRING
61
+ xpath.cstruct[:stringval]
62
+ when LibXML::XmlXpathObject::XPATH_NUMBER
63
+ xpath.cstruct[:floatval]
64
+ when LibXML::XmlXpathObject::XPATH_BOOLEAN
65
+ 0 != xpath.cstruct[:boolval]
66
+ else
67
+ NodeSet.new(xpath.document)
68
+ end
69
+ end
70
+
71
+ def self.new(node) # :nodoc:
72
+ LibXML.xmlXPathInit()
73
+
74
+ ptr = LibXML.xmlXPathNewContext(node.cstruct[:doc])
75
+
76
+ ctx = allocate
77
+ ctx.cstruct = LibXML::XmlXpathContext.new(ptr)
78
+ ctx.cstruct[:node] = node.cstruct
79
+ ctx
80
+ end
81
+
82
+ private
83
+
84
+ #
85
+ # returns a lambda that will call the handler function with marshalled parameters
86
+ #
87
+ def ruby_funcall(name, xpath_handler) # :nodoc:
88
+ lambda do |ctx, nargs|
89
+ parser_context = LibXML::XmlXpathParserContext.new(ctx)
90
+ context_cstruct = parser_context.context
91
+ document = context_cstruct.document.ruby_doc
92
+
93
+ params = []
94
+
95
+ nargs.times do |j|
96
+ obj = LibXML::XmlXpathObject.new(LibXML.valuePop(ctx))
97
+ case obj[:type]
98
+ when LibXML::XmlXpathObject::XPATH_STRING
99
+ params.unshift obj[:stringval]
100
+ when LibXML::XmlXpathObject::XPATH_BOOLEAN
101
+ params.unshift obj[:boolval] == 1
102
+ when LibXML::XmlXpathObject::XPATH_NUMBER
103
+ params.unshift obj[:floatval]
104
+ when LibXML::XmlXpathObject::XPATH_NODESET
105
+ params.unshift NodeSet.wrap(obj[:nodesetval], document)
106
+ else
107
+ char_ptr = params.unshift LibXML.xmlXPathCastToString(obj)
108
+ string = char_ptr.read_string
109
+ LibXML.xmlFree(char_ptr)
110
+ string
111
+ end
112
+ end
113
+
114
+ result = xpath_handler.send(name, *params)
115
+
116
+ case result.class.to_s
117
+ when Fixnum.to_s, Float.to_s, Bignum.to_s
118
+ LibXML.xmlXPathReturnNumber(ctx, result)
119
+ when String.to_s
120
+ LibXML.xmlXPathReturnString(
121
+ ctx,
122
+ LibXML.xmlXPathWrapCString(result)
123
+ )
124
+ when TrueClass.to_s
125
+ LibXML.xmlXPathReturnTrue(ctx)
126
+ when FalseClass.to_s
127
+ LibXML.xmlXPathReturnFalse(ctx)
128
+ when NilClass.to_s
129
+ ;
130
+ when Array.to_s
131
+ node_set = XML::NodeSet.new(document, result)
132
+ LibXML.xmlXPathReturnNodeSet(
133
+ ctx,
134
+ LibXML.xmlXPathNodeSetMerge(nil, node_set.cstruct)
135
+ )
136
+ else
137
+ if result.is_a?(XML::NodeSet)
138
+ LibXML.xmlXPathReturnNodeSet(
139
+ ctx,
140
+ LibXML.xmlXPathNodeSetMerge(nil, result.cstruct)
141
+ )
142
+ else
143
+ raise RuntimeError.new("Invalid return type #{result.class.inspect}")
144
+ end
145
+ end
146
+
147
+ nil
148
+ end # lambda
149
+ end # ruby_funcall
150
+
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,77 @@
1
+ module Nokogiri
2
+ module XSLT
3
+ @modules = {}
4
+
5
+ @method_caller = lambda do |context, nargs|
6
+ # TODO
7
+ end
8
+
9
+ @init_func = lambda do |context, uri|
10
+ klass = @modules[uri]
11
+ klass.instance_methods(false).each do |method_name|
12
+ LibXML.xsltRegisterExtFunction(context, method_name, uri, @method_caller)
13
+ end
14
+ klass.new
15
+ end
16
+
17
+ @shutdown_func = lambda do |context, uri, data|
18
+ end
19
+
20
+ def self.register(uri, klass) # :nodoc:
21
+ raise NotImplementedError.new("sorry, you should implement me.")
22
+ end
23
+
24
+ class Stylesheet
25
+
26
+ attr_accessor :cstruct # :nodoc:
27
+
28
+ def self.parse_stylesheet_doc(document) # :nodoc:
29
+ LibXML.exsltRegisterAll
30
+
31
+ generic_exception_handler = lambda do |ctx, msg|
32
+ raise RuntimeError.new(msg) # TODO: varargs
33
+ end
34
+ LibXML.xsltSetGenericErrorFunc(nil, generic_exception_handler)
35
+
36
+ ss = LibXML.xsltParseStylesheetDoc(LibXML.xmlCopyDoc(document.cstruct, 1)) # 1 => recursive
37
+
38
+ LibXML.xsltSetGenericErrorFunc(nil, nil)
39
+
40
+ obj = allocate
41
+ obj.cstruct = LibXML::XsltStylesheet.new(ss)
42
+ obj
43
+ end
44
+
45
+ def serialize(document) # :nodoc:
46
+ buf_ptr = FFI::Buffer.new :pointer
47
+ buf_len = FFI::Buffer.new :int
48
+ LibXML.xsltSaveResultToString(buf_ptr, buf_len, document.cstruct, cstruct)
49
+ buf = Nokogiri::LibXML::XmlAlloc.new(buf_ptr.get_pointer(0))
50
+ buf.pointer.read_string(buf_len.get_int(0))
51
+ end
52
+
53
+ def transform(document, params=[]) # :nodoc:
54
+ unless document.kind_of? Nokogiri::XML::Document
55
+ raise ArgumentError, "argument must be a Nokogiri::XML::Document"
56
+ end
57
+
58
+ params = params.to_a.flatten if params.is_a?(Hash)
59
+ raise(TypeError) unless params.is_a?(Array)
60
+
61
+ param_arr = FFI::MemoryPointer.new(:pointer, params.length + 1, false)
62
+
63
+ # Keep the MemoryPointer instances alive until after the call
64
+ ptrs = params.map { |param | FFI::MemoryPointer.from_string(param.to_s) }
65
+ param_arr.put_array_of_pointer(0, ptrs)
66
+
67
+ # Terminate the list with a NULL pointer
68
+ param_arr.put_pointer(LibXML.pointer_offset(params.length), nil)
69
+
70
+ ptr = LibXML.xsltApplyStylesheet(cstruct, document.cstruct, param_arr)
71
+ raise(RuntimeError, "could not perform xslt transform on document") if ptr.null?
72
+
73
+ XML::Document.wrap(ptr)
74
+ end
75
+ end
76
+ end
77
+ end
data/lib/nokogiri/html.rb CHANGED
@@ -1,69 +1,35 @@
1
1
  require 'nokogiri/html/entity_lookup'
2
2
  require 'nokogiri/html/document'
3
+ require 'nokogiri/html/document_fragment'
4
+ require 'nokogiri/html/sax/parser_context'
3
5
  require 'nokogiri/html/sax/parser'
6
+ require 'nokogiri/html/element_description'
4
7
 
5
8
  module Nokogiri
6
9
  class << self
7
10
  ###
8
- # Parse HTML. +thing+ may be a String, or any object that
9
- # responds to _read_ and _close_ such as an IO, or StringIO.
10
- # +url+ is resource where this document is located. +encoding+ is the
11
- # encoding that should be used when processing the document. +options+
12
- # is a number that sets options in the parser, such as
13
- # Nokogiri::XML::PARSE_RECOVER. See the constants in
14
- # Nokogiri::XML.
15
- def HTML thing, url = nil, encoding = nil, options = 2145
16
- Nokogiri::HTML.parse(thing, url, encoding, options)
11
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
12
+ def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
13
+ Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
17
14
  end
18
15
  end
19
16
 
20
17
  module HTML
21
- # Parser options
22
- PARSE_NOERROR = 1 << 5 # No error reports
23
- PARSE_NOWARNING = 1 << 6 # No warnings
24
- PARSE_PEDANTIC = 1 << 7 # Pedantic errors
25
- PARSE_NOBLANKS = 1 << 8 # Remove blanks nodes
26
- PARSE_NONET = 1 << 11 # No network access
27
-
28
18
  class << self
29
19
  ###
30
- # Parse HTML. See Nokogiri.HTML.
31
- def parse string_or_io, url = nil, encoding = nil, options = 2145
32
- if string_or_io.respond_to?(:encoding)
33
- encoding ||= string_or_io.encoding.name
34
- end
35
-
36
- if string_or_io.respond_to?(:read)
37
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
38
- return Document.read_io(string_or_io, url, encoding, options)
39
- end
40
-
41
- return Document.new if(string_or_io.length == 0)
42
- Document.read_memory(string_or_io, url, encoding, options)
20
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
21
+ def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
22
+ Document.parse(thing, url, encoding, options, &block)
43
23
  end
44
24
 
45
25
  ####
46
26
  # Parse a fragment from +string+ in to a NodeSet.
47
- def fragment string
48
- doc = parse(string)
49
- fragment = XML::DocumentFragment.new(doc)
50
- finder = lambda { |c, f|
51
- c.each do |child|
52
- if string == child.content && child.name == 'text'
53
- fragment.add_child(child)
54
- end
55
- fragment.add_child(child) if string =~ /<#{child.name}/
56
- end
57
- return fragment if fragment.children.length > 0
58
-
59
- c.each do |child|
60
- finder.call(child.children, f)
61
- end
62
- }
63
- finder.call(doc.children, finder)
64
- fragment
27
+ def fragment string, encoding = nil
28
+ HTML::DocumentFragment.parse string, encoding
65
29
  end
66
30
  end
31
+
32
+ # Instance of Nokogiri::HTML::EntityLookup
67
33
  NamedCharacters = EntityLookup.new
68
34
  end
69
35
  end
@@ -1,6 +1,32 @@
1
1
  module Nokogiri
2
2
  module HTML
3
- class Builder < XML::Builder
3
+ ###
4
+ # Nokogiri HTML builder is used for building HTML documents. It is very
5
+ # similar to the Nokogiri::XML::Builder. In fact, you should go read the
6
+ # documentation for Nokogiri::XML::Builder before reading this
7
+ # documentation.
8
+ #
9
+ # == Synopsis:
10
+ #
11
+ # Create an HTML document with a body that has an onload attribute, and a
12
+ # span tag with a class of "bold" that has content of "Hello world".
13
+ #
14
+ # builder = Nokogiri::HTML::Builder.new do |doc|
15
+ # doc.html {
16
+ # doc.body(:onload => 'some_func();') {
17
+ # doc.span.bold {
18
+ # doc.text "Hello world"
19
+ # }
20
+ # }
21
+ # }
22
+ # end
23
+ # puts builder.to_html
24
+ #
25
+ # The HTML builder inherits from the XML builder, so make sure to read the
26
+ # Nokogiri::XML::Builder documentation.
27
+ class Builder < Nokogiri::XML::Builder
28
+ ###
29
+ # Convert the builder to HTML
4
30
  def to_html
5
31
  @doc.to_html
6
32
  end
@@ -1,14 +1,208 @@
1
1
  module Nokogiri
2
2
  module HTML
3
- class Document < XML::Document
3
+ class Document < Nokogiri::XML::Document
4
+ ###
5
+ # Get the meta tag encoding for this document. If there is no meta tag,
6
+ # then nil is returned.
7
+ def meta_encoding
8
+ meta = meta_content_type and
9
+ /charset\s*=\s*([\w-]+)/i.match(meta['content'])[1]
10
+ end
11
+
12
+ ###
13
+ # Set the meta tag encoding for this document. If there is no meta
14
+ # content tag, the encoding is not set.
15
+ def meta_encoding= encoding
16
+ meta = meta_content_type and
17
+ meta['content'] = "text/html; charset=%s" % encoding
18
+ end
19
+
20
+ def meta_content_type
21
+ css('meta[@http-equiv]').find { |node|
22
+ node['http-equiv'] =~ /\AContent-Type\z/i
23
+ }
24
+ end
25
+ private :meta_content_type
26
+
27
+ ###
28
+ # Get the title string of this document. Return nil if there is
29
+ # no title tag.
30
+ def title
31
+ title = at('title') and title.inner_text
32
+ end
33
+
34
+ ###
35
+ # Set the title string of this document. If there is no head
36
+ # element, the title is not set.
37
+ def title=(text)
38
+ unless title = at('title')
39
+ head = at('head') or return nil
40
+ title = Nokogiri::XML::Node.new('title', self)
41
+ head << title
42
+ end
43
+ title.children = XML::Text.new(text, self)
44
+ end
45
+
4
46
  ####
5
- # Serialize this Document with +encoding+ using +options+
6
- def serialize encoding = nil, options = XML::Node::SaveOptions::FORMAT |
7
- XML::Node::SaveOptions::AS_HTML |
8
- XML::Node::SaveOptions::NO_DECLARATION |
9
- XML::Node::SaveOptions::NO_EMPTY_TAGS
47
+ # Serialize Node using +options+. Save options can also be set using a
48
+ # block. See SaveOptions.
49
+ #
50
+ # These two statements are equivalent:
51
+ #
52
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
53
+ #
54
+ # or
55
+ #
56
+ # node.serialize(:encoding => 'UTF-8') do |config|
57
+ # config.format.as_xml
58
+ # end
59
+ #
60
+ def serialize options = {}
61
+ options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
62
+ super
63
+ end
64
+
65
+ ####
66
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
67
+ def fragment tags = nil
68
+ DocumentFragment.new(self, tags, self.root)
69
+ end
70
+
71
+ class << self
72
+ ###
73
+ # Parse HTML. +thing+ may be a String, or any object that
74
+ # responds to _read_ and _close_ such as an IO, or StringIO.
75
+ # +url+ is resource where this document is located. +encoding+ is the
76
+ # encoding that should be used when processing the document. +options+
77
+ # is a number that sets options in the parser, such as
78
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
79
+ # Nokogiri::XML::ParseOptions.
80
+ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
81
+
82
+ options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
83
+ # Give the options to the user
84
+ yield options if block_given?
85
+
86
+ if string_or_io.respond_to?(:encoding)
87
+ unless string_or_io.encoding.name == "ASCII-8BIT"
88
+ encoding ||= string_or_io.encoding.name
89
+ end
90
+ end
91
+
92
+ if string_or_io.respond_to?(:read)
93
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
94
+ if !encoding
95
+ # Perform further encoding detection that libxml2 does
96
+ # not do.
97
+ string_or_io = EncodingReader.new(string_or_io)
98
+ begin
99
+ return read_io(string_or_io, url, encoding, options.to_i)
100
+ rescue EncodingFoundException => e
101
+ # A retry is required because libxml2 has a problem in
102
+ # that it cannot switch encoding well in the middle of
103
+ # parsing, especially if it has already seen a
104
+ # non-ASCII character when it finds an encoding hint.
105
+ encoding = e.encoding
106
+ end
107
+ end
108
+ return read_io(string_or_io, url, encoding, options.to_i)
109
+ end
110
+
111
+ # read_memory pukes on empty docs
112
+ return new if string_or_io.nil? or string_or_io.empty?
113
+
114
+ if !encoding
115
+ encoding = EncodingReader.detect_encoding(string_or_io)
116
+ end
117
+
118
+ read_memory(string_or_io, url, encoding, options.to_i)
119
+ end
120
+ end
121
+
122
+ class EncodingFoundException < Exception # :nodoc:
123
+ attr_reader :encoding
124
+
125
+ def initialize(encoding)
126
+ @encoding = encoding
127
+ super("encoding found: %s" % encoding)
128
+ end
129
+ end
130
+
131
+ class EncodingReader # :nodoc:
132
+ class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
133
+ attr_reader :encoding
134
+
135
+ def found(encoding)
136
+ @encoding = encoding
137
+ throw :found
138
+ end
139
+
140
+ def not_found(encoding)
141
+ found nil
142
+ end
143
+
144
+ def start_element(name, attrs = [])
145
+ case name
146
+ when /\A(?:div|h1|img|p|br)\z/
147
+ not_found
148
+ when 'meta'
149
+ attr = Hash[attrs]
150
+ http_equiv = attr['http-equiv'] and
151
+ http_equiv.match(/\AContent-Type\z/i) and
152
+ content = attr['content'] and
153
+ m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
154
+ found m[1]
155
+ end
156
+ end
157
+ end
158
+
159
+ def self.detect_encoding(chunk)
160
+ m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
161
+ return Nokogiri.XML(m[1]).encoding
162
+
163
+ handler = SAXHandler.new
164
+ parser = Nokogiri::HTML::SAX::Parser.new(handler)
165
+ catch(:found) {
166
+ parser.parse(chunk)
167
+ }
168
+ handler.encoding
169
+ rescue => e
170
+ nil
171
+ end
172
+
173
+ def initialize(io)
174
+ @io = io
175
+ @firstchunk = nil
176
+ end
177
+
178
+ def read(len)
179
+ # no support for a call without len
180
+
181
+ if !@firstchunk
182
+ @firstchunk = @io.read(len) or return nil
183
+
184
+ # This implementation expects and assumes that the first
185
+ # call from htmlReadIO() is made with a length long enough
186
+ # (~1KB) to achieve further encoding detection that
187
+ # libxml2 does not do.
188
+ if encoding = EncodingReader.detect_encoding(@firstchunk)
189
+ raise EncodingFoundException, encoding
190
+ end
191
+
192
+ # This chunk is stored for the next read in retry.
193
+ return @firstchunk
194
+ end
10
195
 
11
- super(encoding, options)
196
+ ret = @firstchunk.slice!(0, len)
197
+ if (len -= ret.length) > 0
198
+ rest = @io.read(len) and ret << rest
199
+ end
200
+ if ret.empty?
201
+ nil
202
+ else
203
+ ret
204
+ end
205
+ end
12
206
  end
13
207
  end
14
208
  end